blob: ca2cb00e4bc555cebb166716d5ec4465cc17e488 [file] [log] [blame]
Ben Murdochf87a2032010-10-22 12:50:53 +01001#!/usr/bin/env python
2#
Ben Murdoch3ef787d2012-04-12 10:51:47 +01003# Copyright 2012 the V8 project authors. All rights reserved.
Ben Murdochf87a2032010-10-22 12:50:53 +01004# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8# * Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10# * Redistributions in binary form must reproduce the above
11# copyright notice, this list of conditions and the following
12# disclaimer in the documentation and/or other materials provided
13# with the distribution.
14# * Neither the name of Google Inc. nor the names of its
15# contributors may be used to endorse or promote products derived
16# from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30import bisect
31import collections
32import ctypes
Ben Murdoche0cee9b2011-05-25 10:26:03 +010033import disasm
Ben Murdochf87a2032010-10-22 12:50:53 +010034import mmap
35import optparse
36import os
37import re
38import subprocess
39import sys
Ben Murdochf87a2032010-10-22 12:50:53 +010040import time
41
42
43USAGE="""usage: %prog [OPTION]...
44
45Analyses V8 and perf logs to produce profiles.
46
47Perf logs can be collected using a command like:
Ben Murdochb8a8cc12014-11-26 15:28:44 +000048 $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
Ben Murdochf87a2032010-10-22 12:50:53 +010049 # -R: collect all data
50 # -e cycles: use cpu-cycles event (run "perf list" for details)
51 # -c 10000: write a sample after each 10000 events
52 # -f: force output file overwrite
53 # -i: limit profiling to our process and the kernel
54 # --ll-prof shell flag enables the right V8 logs
55This will produce a binary trace file (perf.data) that %prog can analyse.
56
Ben Murdochb8a8cc12014-11-26 15:28:44 +000057IMPORTANT:
58 The kernel has an internal maximum for events per second, it is 100K by
59 default. That's not enough for "-c 10000". Set it to some higher value:
60 $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
61 You can also make the warning about kernel address maps go away:
62 $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
63
64We have a convenience script that handles all of the above for you:
65 $ tools/run-llprof.sh ./d8 bench.js
66
Ben Murdochf87a2032010-10-22 12:50:53 +010067Examples:
68 # Print flat profile with annotated disassembly for the 10 top
Ben Murdochda12d292016-06-02 14:46:10 +010069 # symbols. Use default log names.
70 $ %prog --disasm-top=10
Ben Murdochf87a2032010-10-22 12:50:53 +010071
72 # Print flat profile with annotated disassembly for all used symbols.
73 # Use default log names and include kernel symbols into analysis.
74 $ %prog --disasm-all --kernel
75
76 # Print flat profile. Use custom log names.
Ben Murdochda12d292016-06-02 14:46:10 +010077 $ %prog --log=foo.log --trace=foo.data
Ben Murdochf87a2032010-10-22 12:50:53 +010078"""
79
80
Ben Murdochf87a2032010-10-22 12:50:53 +010081JS_ORIGIN = "js"
Ben Murdochda12d292016-06-02 14:46:10 +010082
Ben Murdochf87a2032010-10-22 12:50:53 +010083
Ben Murdochf87a2032010-10-22 12:50:53 +010084class Code(object):
85 """Code object."""
86
Ben Murdochf87a2032010-10-22 12:50:53 +010087 _id = 0
Ben Murdochb8a8cc12014-11-26 15:28:44 +000088 UNKNOWN = 0
89 V8INTERNAL = 1
90 FULL_CODEGEN = 2
91 OPTIMIZED = 3
Ben Murdochf87a2032010-10-22 12:50:53 +010092
93 def __init__(self, name, start_address, end_address, origin, origin_offset):
94 self.id = Code._id
95 Code._id += 1
96 self.name = name
97 self.other_names = None
98 self.start_address = start_address
99 self.end_address = end_address
100 self.origin = origin
101 self.origin_offset = origin_offset
102 self.self_ticks = 0
103 self.self_ticks_map = None
104 self.callee_ticks = None
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000105 if name.startswith("LazyCompile:*"):
106 self.codetype = Code.OPTIMIZED
107 elif name.startswith("LazyCompile:"):
108 self.codetype = Code.FULL_CODEGEN
109 elif name.startswith("v8::internal::"):
110 self.codetype = Code.V8INTERNAL
111 else:
112 self.codetype = Code.UNKNOWN
Ben Murdochf87a2032010-10-22 12:50:53 +0100113
114 def AddName(self, name):
115 assert self.name != name
116 if self.other_names is None:
117 self.other_names = [name]
118 return
119 if not name in self.other_names:
120 self.other_names.append(name)
121
122 def FullName(self):
123 if self.other_names is None:
124 return self.name
125 self.other_names.sort()
126 return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
127
128 def IsUsed(self):
129 return self.self_ticks > 0 or self.callee_ticks is not None
130
131 def Tick(self, pc):
132 self.self_ticks += 1
133 if self.self_ticks_map is None:
134 self.self_ticks_map = collections.defaultdict(lambda: 0)
135 offset = pc - self.start_address
136 self.self_ticks_map[offset] += 1
137
138 def CalleeTick(self, callee):
139 if self.callee_ticks is None:
140 self.callee_ticks = collections.defaultdict(lambda: 0)
141 self.callee_ticks[callee] += 1
142
Ben Murdoch257744e2011-11-30 15:57:28 +0000143 def PrintAnnotated(self, arch, options):
Ben Murdochf87a2032010-10-22 12:50:53 +0100144 if self.self_ticks_map is None:
145 ticks_map = []
146 else:
147 ticks_map = self.self_ticks_map.items()
148 # Convert the ticks map to offsets and counts arrays so that later
149 # we can do binary search in the offsets array.
150 ticks_map.sort(key=lambda t: t[0])
151 ticks_offsets = [t[0] for t in ticks_map]
152 ticks_counts = [t[1] for t in ticks_map]
153 # Get a list of disassembled lines and their addresses.
Ben Murdoch257744e2011-11-30 15:57:28 +0000154 lines = self._GetDisasmLines(arch, options)
Ben Murdochf87a2032010-10-22 12:50:53 +0100155 if len(lines) == 0:
156 return
157 # Print annotated lines.
158 address = lines[0][0]
159 total_count = 0
160 for i in xrange(len(lines)):
161 start_offset = lines[i][0] - address
162 if i == len(lines) - 1:
163 end_offset = self.end_address - self.start_address
164 else:
165 end_offset = lines[i + 1][0] - address
166 # Ticks (reported pc values) are not always precise, i.e. not
167 # necessarily point at instruction starts. So we have to search
168 # for ticks that touch the current instruction line.
169 j = bisect.bisect_left(ticks_offsets, end_offset)
170 count = 0
171 for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
172 if offset < start_offset:
173 break
174 count += cnt
175 total_count += count
Ben Murdoch097c5b22016-05-18 11:27:45 +0100176 percent = 100.0 * count / self.self_ticks
177 offset = lines[i][0]
178 if percent >= 0.01:
179 # 5 spaces for tick count
180 # 1 space following
181 # 1 for '|'
182 # 1 space following
183 # 6 for the percentage number, incl. the '.'
184 # 1 for the '%' sign
185 # => 15
186 print "%5d | %6.2f%% %x(%d): %s" % (count, percent, offset, offset, lines[i][1])
Ben Murdochf87a2032010-10-22 12:50:53 +0100187 else:
Ben Murdoch097c5b22016-05-18 11:27:45 +0100188 print "%s %x(%d): %s" % (" " * 15, offset, offset, lines[i][1])
Ben Murdochf87a2032010-10-22 12:50:53 +0100189 print
190 assert total_count == self.self_ticks, \
191 "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
192
193 def __str__(self):
194 return "%s [0x%x, 0x%x) size: %d origin: %s" % (
195 self.name,
196 self.start_address,
197 self.end_address,
198 self.end_address - self.start_address,
199 self.origin)
200
Ben Murdoch257744e2011-11-30 15:57:28 +0000201 def _GetDisasmLines(self, arch, options):
Ben Murdochda12d292016-06-02 14:46:10 +0100202 if self.origin == JS_ORIGIN:
Ben Murdoche0cee9b2011-05-25 10:26:03 +0100203 inplace = False
Ben Murdoch257744e2011-11-30 15:57:28 +0000204 filename = options.log + ".ll"
Ben Murdochf87a2032010-10-22 12:50:53 +0100205 else:
Ben Murdoche0cee9b2011-05-25 10:26:03 +0100206 inplace = True
207 filename = self.origin
208 return disasm.GetDisasmLines(filename,
209 self.origin_offset,
210 self.end_address - self.start_address,
Ben Murdoch257744e2011-11-30 15:57:28 +0000211 arch,
Ben Murdoche0cee9b2011-05-25 10:26:03 +0100212 inplace)
Ben Murdochf87a2032010-10-22 12:50:53 +0100213
214
215class CodePage(object):
216 """Group of adjacent code objects."""
217
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000218 SHIFT = 20 # 1M pages
Ben Murdochf87a2032010-10-22 12:50:53 +0100219 SIZE = (1 << SHIFT)
220 MASK = ~(SIZE - 1)
221
222 @staticmethod
223 def PageAddress(address):
224 return address & CodePage.MASK
225
226 @staticmethod
227 def PageId(address):
228 return address >> CodePage.SHIFT
229
230 @staticmethod
231 def PageAddressFromId(id):
232 return id << CodePage.SHIFT
233
234 def __init__(self, address):
235 self.address = address
236 self.code_objects = []
237
238 def Add(self, code):
239 self.code_objects.append(code)
240
241 def Remove(self, code):
242 self.code_objects.remove(code)
243
244 def Find(self, pc):
245 code_objects = self.code_objects
246 for i, code in enumerate(code_objects):
247 if code.start_address <= pc < code.end_address:
248 code_objects[0], code_objects[i] = code, code_objects[0]
249 return code
250 return None
251
252 def __iter__(self):
253 return self.code_objects.__iter__()
254
255
256class CodeMap(object):
257 """Code object map."""
258
259 def __init__(self):
260 self.pages = {}
261 self.min_address = 1 << 64
262 self.max_address = -1
263
264 def Add(self, code, max_pages=-1):
265 page_id = CodePage.PageId(code.start_address)
266 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
267 pages = 0
268 while page_id < limit_id:
269 if max_pages >= 0 and pages > max_pages:
270 print >>sys.stderr, \
271 "Warning: page limit (%d) reached for %s [%s]" % (
272 max_pages, code.name, code.origin)
273 break
274 if page_id in self.pages:
275 page = self.pages[page_id]
276 else:
277 page = CodePage(CodePage.PageAddressFromId(page_id))
278 self.pages[page_id] = page
279 page.Add(code)
280 page_id += 1
281 pages += 1
282 self.min_address = min(self.min_address, code.start_address)
283 self.max_address = max(self.max_address, code.end_address)
284
285 def Remove(self, code):
286 page_id = CodePage.PageId(code.start_address)
287 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
288 removed = False
289 while page_id < limit_id:
290 if page_id not in self.pages:
291 page_id += 1
292 continue
293 page = self.pages[page_id]
294 page.Remove(code)
295 removed = True
296 page_id += 1
297 return removed
298
299 def AllCode(self):
300 for page in self.pages.itervalues():
301 for code in page:
302 if CodePage.PageAddress(code.start_address) == page.address:
303 yield code
304
305 def UsedCode(self):
306 for code in self.AllCode():
307 if code.IsUsed():
308 yield code
309
310 def Print(self):
311 for code in self.AllCode():
312 print code
313
314 def Find(self, pc):
315 if pc < self.min_address or pc >= self.max_address:
316 return None
317 page_id = CodePage.PageId(pc)
318 if page_id not in self.pages:
319 return None
320 return self.pages[page_id].Find(pc)
321
322
323class CodeInfo(object):
324 """Generic info about generated code objects."""
325
326 def __init__(self, arch, header_size):
327 self.arch = arch
328 self.header_size = header_size
329
330
Ben Murdoch257744e2011-11-30 15:57:28 +0000331class LogReader(object):
332 """V8 low-level (binary) log reader."""
Ben Murdochf87a2032010-10-22 12:50:53 +0100333
Ben Murdoch257744e2011-11-30 15:57:28 +0000334 _ARCH_TO_POINTER_TYPE_MAP = {
335 "ia32": ctypes.c_uint32,
336 "arm": ctypes.c_uint32,
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100337 "mips": ctypes.c_uint32,
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000338 "x64": ctypes.c_uint64,
339 "arm64": ctypes.c_uint64
Ben Murdoch257744e2011-11-30 15:57:28 +0000340 }
Ben Murdochf87a2032010-10-22 12:50:53 +0100341
Ben Murdoch257744e2011-11-30 15:57:28 +0000342 _CODE_CREATE_TAG = "C"
343 _CODE_MOVE_TAG = "M"
Ben Murdoch257744e2011-11-30 15:57:28 +0000344 _CODE_MOVING_GC_TAG = "G"
345
Ben Murdochda12d292016-06-02 14:46:10 +0100346 def __init__(self, log_name, code_map):
Ben Murdoch257744e2011-11-30 15:57:28 +0000347 self.log_file = open(log_name, "r")
348 self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
349 self.log_pos = 0
Ben Murdochf87a2032010-10-22 12:50:53 +0100350 self.code_map = code_map
Ben Murdochf87a2032010-10-22 12:50:53 +0100351
Ben Murdoch257744e2011-11-30 15:57:28 +0000352 self.arch = self.log[:self.log.find("\0")]
353 self.log_pos += len(self.arch) + 1
354 assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
355 "Unsupported architecture %s" % self.arch
356 pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
Ben Murdochf87a2032010-10-22 12:50:53 +0100357
Ben Murdoch257744e2011-11-30 15:57:28 +0000358 self.code_create_struct = LogReader._DefineStruct([
359 ("name_size", ctypes.c_int32),
360 ("code_address", pointer_type),
361 ("code_size", ctypes.c_int32)])
Ben Murdochf87a2032010-10-22 12:50:53 +0100362
Ben Murdoch257744e2011-11-30 15:57:28 +0000363 self.code_move_struct = LogReader._DefineStruct([
364 ("from_address", pointer_type),
365 ("to_address", pointer_type)])
366
367 self.code_delete_struct = LogReader._DefineStruct([
368 ("address", pointer_type)])
369
Ben Murdoch257744e2011-11-30 15:57:28 +0000370 def ReadUpToGC(self):
371 while self.log_pos < self.log.size():
372 tag = self.log[self.log_pos]
373 self.log_pos += 1
374
375 if tag == LogReader._CODE_MOVING_GC_TAG:
Ben Murdoch257744e2011-11-30 15:57:28 +0000376 return
Ben Murdochf87a2032010-10-22 12:50:53 +0100377
Ben Murdoch257744e2011-11-30 15:57:28 +0000378 if tag == LogReader._CODE_CREATE_TAG:
379 event = self.code_create_struct.from_buffer(self.log, self.log_pos)
380 self.log_pos += ctypes.sizeof(event)
381 start_address = event.code_address
382 end_address = start_address + event.code_size
Ben Murdochda12d292016-06-02 14:46:10 +0100383 name = self.log[self.log_pos:self.log_pos + event.name_size]
384 origin = JS_ORIGIN
Ben Murdoch257744e2011-11-30 15:57:28 +0000385 self.log_pos += event.name_size
386 origin_offset = self.log_pos
387 self.log_pos += event.code_size
Ben Murdochf87a2032010-10-22 12:50:53 +0100388 code = Code(name, start_address, end_address, origin, origin_offset)
389 conficting_code = self.code_map.Find(start_address)
390 if conficting_code:
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100391 if not (conficting_code.start_address == code.start_address and
392 conficting_code.end_address == code.end_address):
393 self.code_map.Remove(conficting_code)
394 else:
395 LogReader._HandleCodeConflict(conficting_code, code)
396 # TODO(vitalyr): this warning is too noisy because of our
397 # attempts to reconstruct code log from the snapshot.
398 # print >>sys.stderr, \
399 # "Warning: Skipping duplicate code log entry %s" % code
400 continue
Ben Murdochf87a2032010-10-22 12:50:53 +0100401 self.code_map.Add(code)
402 continue
403
Ben Murdoch257744e2011-11-30 15:57:28 +0000404 if tag == LogReader._CODE_MOVE_TAG:
405 event = self.code_move_struct.from_buffer(self.log, self.log_pos)
406 self.log_pos += ctypes.sizeof(event)
407 old_start_address = event.from_address
408 new_start_address = event.to_address
Ben Murdochf87a2032010-10-22 12:50:53 +0100409 if old_start_address == new_start_address:
410 # Skip useless code move entries.
411 continue
412 code = self.code_map.Find(old_start_address)
413 if not code:
414 print >>sys.stderr, "Warning: Not found %x" % old_start_address
415 continue
416 assert code.start_address == old_start_address, \
417 "Inexact move address %x for %s" % (old_start_address, code)
418 self.code_map.Remove(code)
419 size = code.end_address - code.start_address
420 code.start_address = new_start_address
421 code.end_address = new_start_address + size
422 self.code_map.Add(code)
423 continue
424
Ben Murdoch257744e2011-11-30 15:57:28 +0000425 assert False, "Unknown tag %s" % tag
Ben Murdochf87a2032010-10-22 12:50:53 +0100426
427 def Dispose(self):
428 self.log.close()
Ben Murdoch257744e2011-11-30 15:57:28 +0000429 self.log_file.close()
430
431 @staticmethod
432 def _DefineStruct(fields):
433 class Struct(ctypes.Structure):
434 _fields_ = fields
435 return Struct
Ben Murdochf87a2032010-10-22 12:50:53 +0100436
437 @staticmethod
438 def _HandleCodeConflict(old_code, new_code):
439 assert (old_code.start_address == new_code.start_address and
440 old_code.end_address == new_code.end_address), \
441 "Conficting code log entries %s and %s" % (old_code, new_code)
Ben Murdochf87a2032010-10-22 12:50:53 +0100442 if old_code.name == new_code.name:
443 return
Ben Murdochf87a2032010-10-22 12:50:53 +0100444 # Code object may be shared by a few functions. Collect the full
445 # set of names.
446 old_code.AddName(new_code.name)
447
448
449class Descriptor(object):
450 """Descriptor of a structure in the binary trace log."""
451
452 CTYPE_MAP = {
453 "u16": ctypes.c_uint16,
454 "u32": ctypes.c_uint32,
455 "u64": ctypes.c_uint64
456 }
457
458 def __init__(self, fields):
459 class TraceItem(ctypes.Structure):
460 _fields_ = Descriptor.CtypesFields(fields)
461
462 def __str__(self):
463 return ", ".join("%s: %s" % (field, self.__getattribute__(field))
464 for field, _ in TraceItem._fields_)
465
466 self.ctype = TraceItem
467
468 def Read(self, trace, offset):
469 return self.ctype.from_buffer(trace, offset)
470
471 @staticmethod
472 def CtypesFields(fields):
473 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
474
475
476# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
477# for the gory details.
478
479
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000480# Reference: struct perf_file_header in kernel/tools/perf/util/header.h
Ben Murdochf87a2032010-10-22 12:50:53 +0100481TRACE_HEADER_DESC = Descriptor([
482 ("magic", "u64"),
483 ("size", "u64"),
484 ("attr_size", "u64"),
485 ("attrs_offset", "u64"),
486 ("attrs_size", "u64"),
487 ("data_offset", "u64"),
488 ("data_size", "u64"),
489 ("event_types_offset", "u64"),
490 ("event_types_size", "u64")
491])
492
493
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000494# Reference: /usr/include/linux/perf_event.h
Ben Murdochf87a2032010-10-22 12:50:53 +0100495PERF_EVENT_ATTR_DESC = Descriptor([
496 ("type", "u32"),
497 ("size", "u32"),
498 ("config", "u64"),
499 ("sample_period_or_freq", "u64"),
500 ("sample_type", "u64"),
501 ("read_format", "u64"),
502 ("flags", "u64"),
503 ("wakeup_events_or_watermark", "u32"),
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000504 ("bp_type", "u32"),
Ben Murdochf87a2032010-10-22 12:50:53 +0100505 ("bp_addr", "u64"),
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000506 ("bp_len", "u64")
Ben Murdochf87a2032010-10-22 12:50:53 +0100507])
508
509
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000510# Reference: /usr/include/linux/perf_event.h
Ben Murdochf87a2032010-10-22 12:50:53 +0100511PERF_EVENT_HEADER_DESC = Descriptor([
512 ("type", "u32"),
513 ("misc", "u16"),
514 ("size", "u16")
515])
516
517
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000518# Reference: kernel/tools/perf/util/event.h
Ben Murdochf87a2032010-10-22 12:50:53 +0100519PERF_MMAP_EVENT_BODY_DESC = Descriptor([
520 ("pid", "u32"),
521 ("tid", "u32"),
522 ("addr", "u64"),
523 ("len", "u64"),
524 ("pgoff", "u64")
525])
526
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000527# Reference: kernel/tools/perf/util/event.h
528PERF_MMAP2_EVENT_BODY_DESC = Descriptor([
529 ("pid", "u32"),
530 ("tid", "u32"),
531 ("addr", "u64"),
532 ("len", "u64"),
533 ("pgoff", "u64"),
534 ("maj", "u32"),
535 ("min", "u32"),
536 ("ino", "u64"),
537 ("ino_generation", "u64"),
538 ("prot", "u32"),
539 ("flags","u32")
540])
Ben Murdochf87a2032010-10-22 12:50:53 +0100541
542# perf_event_attr.sample_type bits control the set of
543# perf_sample_event fields.
544PERF_SAMPLE_IP = 1 << 0
545PERF_SAMPLE_TID = 1 << 1
546PERF_SAMPLE_TIME = 1 << 2
547PERF_SAMPLE_ADDR = 1 << 3
548PERF_SAMPLE_READ = 1 << 4
549PERF_SAMPLE_CALLCHAIN = 1 << 5
550PERF_SAMPLE_ID = 1 << 6
551PERF_SAMPLE_CPU = 1 << 7
552PERF_SAMPLE_PERIOD = 1 << 8
553PERF_SAMPLE_STREAM_ID = 1 << 9
554PERF_SAMPLE_RAW = 1 << 10
555
556
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000557# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
Ben Murdochf87a2032010-10-22 12:50:53 +0100558PERF_SAMPLE_EVENT_BODY_FIELDS = [
559 ("ip", "u64", PERF_SAMPLE_IP),
560 ("pid", "u32", PERF_SAMPLE_TID),
561 ("tid", "u32", PERF_SAMPLE_TID),
562 ("time", "u64", PERF_SAMPLE_TIME),
563 ("addr", "u64", PERF_SAMPLE_ADDR),
564 ("id", "u64", PERF_SAMPLE_ID),
565 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
566 ("cpu", "u32", PERF_SAMPLE_CPU),
567 ("res", "u32", PERF_SAMPLE_CPU),
568 ("period", "u64", PERF_SAMPLE_PERIOD),
569 # Don't want to handle read format that comes after the period and
570 # before the callchain and has variable size.
571 ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
572 # Raw data follows the callchain and is ignored.
573]
574
575
576PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
577
578
579PERF_RECORD_MMAP = 1
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000580PERF_RECORD_MMAP2 = 10
Ben Murdochf87a2032010-10-22 12:50:53 +0100581PERF_RECORD_SAMPLE = 9
582
583
584class TraceReader(object):
585 """Perf (linux-2.6/tools/perf) trace file reader."""
586
587 _TRACE_HEADER_MAGIC = 4993446653023372624
588
589 def __init__(self, trace_name):
590 self.trace_file = open(trace_name, "r")
591 self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
592 self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
593 if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
594 print >>sys.stderr, "Warning: unsupported trace header magic"
595 self.offset = self.trace_header.data_offset
596 self.limit = self.trace_header.data_offset + self.trace_header.data_size
597 assert self.limit <= self.trace.size(), \
598 "Trace data limit exceeds trace file size"
599 self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
600 assert self.trace_header.attrs_size != 0, \
601 "No perf event attributes found in the trace"
602 perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
603 self.trace_header.attrs_offset)
604 self.sample_event_body_desc = self._SampleEventBodyDesc(
605 perf_event_attr.sample_type)
606 self.callchain_supported = \
607 (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
608 if self.callchain_supported:
609 self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
610 self.ip_size = ctypes.sizeof(self.ip_struct)
611
612 def ReadEventHeader(self):
613 if self.offset >= self.limit:
614 return None, 0
615 offset = self.offset
616 header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
617 self.offset += header.size
618 return header, offset
619
620 def ReadMmap(self, header, offset):
621 mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
622 offset + self.header_size)
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000623 # Read null-terminated filename.
Ben Murdochf87a2032010-10-22 12:50:53 +0100624 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000625 offset + header.size]
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000626 mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
Ben Murdochf87a2032010-10-22 12:50:53 +0100627 return mmap_info
628
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000629 def ReadMmap2(self, header, offset):
630 mmap_info = PERF_MMAP2_EVENT_BODY_DESC.Read(self.trace,
631 offset + self.header_size)
632 # Read null-terminated filename.
633 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
634 offset + header.size]
635 mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
636 return mmap_info
637
Ben Murdochf87a2032010-10-22 12:50:53 +0100638 def ReadSample(self, header, offset):
639 sample = self.sample_event_body_desc.Read(self.trace,
640 offset + self.header_size)
641 if not self.callchain_supported:
642 return sample
643 sample.ips = []
644 offset += self.header_size + ctypes.sizeof(sample)
645 for _ in xrange(sample.nr):
646 sample.ips.append(
647 self.ip_struct.from_buffer(self.trace, offset).value)
648 offset += self.ip_size
649 return sample
650
651 def Dispose(self):
652 self.trace.close()
653 self.trace_file.close()
654
655 def _SampleEventBodyDesc(self, sample_type):
656 assert (sample_type & PERF_SAMPLE_READ) == 0, \
657 "Can't hande read format in samples"
658 fields = [(field, format)
659 for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
660 if (bit & sample_type) != 0]
661 return Descriptor(fields)
662
663
664OBJDUMP_SECTION_HEADER_RE = re.compile(
665 r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
666OBJDUMP_SYMBOL_LINE_RE = re.compile(
667 r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
668OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100669 r"^DYNAMIC SYMBOL TABLE")
670OBJDUMP_SKIP_RE = re.compile(
671 r"^.*ld\.so\.cache$")
Ben Murdochf87a2032010-10-22 12:50:53 +0100672KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
673PERF_KERNEL_ALLSYMS_RE = re.compile(
674 r".*kallsyms.*")
675KERNEL_ALLSYMS_LINE_RE = re.compile(
676 r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
677
678
679class LibraryRepo(object):
680 def __init__(self):
681 self.infos = []
682 self.names = set()
683 self.ticks = {}
684
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000685
686 def HasDynamicSymbols(self, filename):
687 if filename.endswith(".ko"): return False
688 process = subprocess.Popen(
689 "%s -h %s" % (OBJDUMP_BIN, filename),
690 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
691 pipe = process.stdout
692 try:
693 for line in pipe:
694 match = OBJDUMP_SECTION_HEADER_RE.match(line)
695 if match and match.group(1) == 'dynsym': return True
696 finally:
697 pipe.close()
698 assert process.wait() == 0, "Failed to objdump -h %s" % filename
699 return False
700
701
Ben Murdochf87a2032010-10-22 12:50:53 +0100702 def Load(self, mmap_info, code_map, options):
703 # Skip kernel mmaps when requested using the fact that their tid
704 # is 0.
705 if mmap_info.tid == 0 and not options.kernel:
706 return True
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100707 if OBJDUMP_SKIP_RE.match(mmap_info.filename):
708 return True
Ben Murdochf87a2032010-10-22 12:50:53 +0100709 if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
710 return self._LoadKernelSymbols(code_map)
711 self.infos.append(mmap_info)
712 mmap_info.ticks = 0
713 mmap_info.unique_name = self._UniqueMmapName(mmap_info)
714 if not os.path.exists(mmap_info.filename):
715 return True
716 # Request section headers (-h), symbols (-t), and dynamic symbols
717 # (-T) from objdump.
718 # Unfortunately, section headers span two lines, so we have to
719 # keep the just seen section name (from the first line in each
720 # section header) in the after_section variable.
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000721 if self.HasDynamicSymbols(mmap_info.filename):
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000722 dynamic_symbols = "-T"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000723 else:
724 dynamic_symbols = ""
Ben Murdochf87a2032010-10-22 12:50:53 +0100725 process = subprocess.Popen(
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000726 "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
Ben Murdochf87a2032010-10-22 12:50:53 +0100727 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
728 pipe = process.stdout
729 after_section = None
730 code_sections = set()
731 reloc_sections = set()
732 dynamic = False
733 try:
734 for line in pipe:
735 if after_section:
736 if line.find("CODE") != -1:
737 code_sections.add(after_section)
738 if line.find("RELOC") != -1:
739 reloc_sections.add(after_section)
740 after_section = None
741 continue
742
743 match = OBJDUMP_SECTION_HEADER_RE.match(line)
744 if match:
745 after_section = match.group(1)
746 continue
747
748 if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
749 dynamic = True
750 continue
751
752 match = OBJDUMP_SYMBOL_LINE_RE.match(line)
753 if match:
754 start_address = int(match.group(1), 16)
755 origin_offset = start_address
756 flags = match.group(2)
757 section = match.group(3)
758 if section in code_sections:
759 if dynamic or section in reloc_sections:
760 start_address += mmap_info.addr
761 size = int(match.group(4), 16)
762 name = match.group(5)
763 origin = mmap_info.filename
764 code_map.Add(Code(name, start_address, start_address + size,
765 origin, origin_offset))
766 finally:
767 pipe.close()
768 assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
769
770 def Tick(self, pc):
771 for i, mmap_info in enumerate(self.infos):
772 if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
773 mmap_info.ticks += 1
774 self.infos[0], self.infos[i] = mmap_info, self.infos[0]
775 return True
776 return False
777
778 def _UniqueMmapName(self, mmap_info):
779 name = mmap_info.filename
780 index = 1
781 while name in self.names:
782 name = "%s-%d" % (mmap_info.filename, index)
783 index += 1
784 self.names.add(name)
785 return name
786
787 def _LoadKernelSymbols(self, code_map):
788 if not os.path.exists(KERNEL_ALLSYMS_FILE):
789 print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
790 return False
791 kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
792 code = None
793 for line in kallsyms:
794 match = KERNEL_ALLSYMS_LINE_RE.match(line)
795 if match:
796 start_address = int(match.group(1), 16)
797 end_address = start_address
798 name = match.group(2)
799 if code:
800 code.end_address = start_address
801 code_map.Add(code, 16)
802 code = Code(name, start_address, end_address, "kernel", 0)
803 return True
804
805
Ben Murdoch257744e2011-11-30 15:57:28 +0000806def PrintReport(code_map, library_repo, arch, ticks, options):
Ben Murdochf87a2032010-10-22 12:50:53 +0100807 print "Ticks per symbol:"
808 used_code = [code for code in code_map.UsedCode()]
809 used_code.sort(key=lambda x: x.self_ticks, reverse=True)
810 for i, code in enumerate(used_code):
Ben Murdoch257744e2011-11-30 15:57:28 +0000811 code_ticks = code.self_ticks
812 print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
813 code.FullName(), code.origin)
Ben Murdochf87a2032010-10-22 12:50:53 +0100814 if options.disasm_all or i < options.disasm_top:
Ben Murdoch257744e2011-11-30 15:57:28 +0000815 code.PrintAnnotated(arch, options)
Ben Murdochf87a2032010-10-22 12:50:53 +0100816 print
817 print "Ticks per library:"
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000818 mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
Ben Murdochf87a2032010-10-22 12:50:53 +0100819 mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
820 for mmap_info in mmap_infos:
Ben Murdoch257744e2011-11-30 15:57:28 +0000821 mmap_ticks = mmap_info.ticks
822 print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
823 mmap_info.unique_name)
Ben Murdochf87a2032010-10-22 12:50:53 +0100824
825
826def PrintDot(code_map, options):
827 print "digraph G {"
828 for code in code_map.UsedCode():
829 if code.self_ticks < 10:
830 continue
831 print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
832 if code.callee_ticks:
833 for callee, ticks in code.callee_ticks.iteritems():
834 print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
835 print "}"
836
837
838if __name__ == "__main__":
839 parser = optparse.OptionParser(USAGE)
Ben Murdochf87a2032010-10-22 12:50:53 +0100840 parser.add_option("--log",
841 default="v8.log",
842 help="V8 log file name [default: %default]")
Ben Murdochf87a2032010-10-22 12:50:53 +0100843 parser.add_option("--trace",
844 default="perf.data",
845 help="perf trace file name [default: %default]")
846 parser.add_option("--kernel",
847 default=False,
848 action="store_true",
849 help="process kernel entries [default: %default]")
850 parser.add_option("--disasm-top",
851 default=0,
852 type="int",
853 help=("number of top symbols to disassemble and annotate "
854 "[default: %default]"))
855 parser.add_option("--disasm-all",
856 default=False,
857 action="store_true",
858 help=("disassemble and annotate all used symbols "
859 "[default: %default]"))
860 parser.add_option("--dot",
861 default=False,
862 action="store_true",
863 help="produce dot output (WIP) [default: %default]")
864 parser.add_option("--quiet", "-q",
865 default=False,
866 action="store_true",
867 help="no auxiliary messages [default: %default]")
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000868 parser.add_option("--gc-fake-mmap",
869 default="/tmp/__v8_gc__",
870 help="gc fake mmap file [default: %default]")
871 parser.add_option("--objdump",
872 default="/usr/bin/objdump",
873 help="objdump tool to use [default: %default]")
874 parser.add_option("--host-root",
875 default="",
876 help="Path to the host root [default: %default]")
Ben Murdochf87a2032010-10-22 12:50:53 +0100877 options, args = parser.parse_args()
878
879 if not options.quiet:
Ben Murdochda12d292016-06-02 14:46:10 +0100880 print "V8 log: %s, %s.ll" % (options.log, options.log)
Ben Murdochf87a2032010-10-22 12:50:53 +0100881 print "Perf trace file: %s" % options.trace
882
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000883 V8_GC_FAKE_MMAP = options.gc_fake_mmap
884 HOST_ROOT = options.host_root
885 if os.path.exists(options.objdump):
886 disasm.OBJDUMP_BIN = options.objdump
887 OBJDUMP_BIN = options.objdump
888 else:
889 print "Cannot find %s, falling back to default objdump" % options.objdump
890
Ben Murdochf87a2032010-10-22 12:50:53 +0100891 # Stats.
892 events = 0
893 ticks = 0
894 missed_ticks = 0
895 really_missed_ticks = 0
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000896 optimized_ticks = 0
897 generated_ticks = 0
898 v8_internal_ticks = 0
Ben Murdochf87a2032010-10-22 12:50:53 +0100899 mmap_time = 0
900 sample_time = 0
901
Ben Murdoch257744e2011-11-30 15:57:28 +0000902 # Initialize the log reader.
903 code_map = CodeMap()
904 log_reader = LogReader(log_name=options.log + ".ll",
Ben Murdochda12d292016-06-02 14:46:10 +0100905 code_map=code_map)
Ben Murdoch257744e2011-11-30 15:57:28 +0000906 if not options.quiet:
907 print "Generated code architecture: %s" % log_reader.arch
908 print
909 sys.stdout.flush()
Ben Murdochf87a2032010-10-22 12:50:53 +0100910
911 # Process the code and trace logs.
912 library_repo = LibraryRepo()
Ben Murdoch257744e2011-11-30 15:57:28 +0000913 log_reader.ReadUpToGC()
Ben Murdochf87a2032010-10-22 12:50:53 +0100914 trace_reader = TraceReader(options.trace)
915 while True:
916 header, offset = trace_reader.ReadEventHeader()
917 if not header:
918 break
919 events += 1
920 if header.type == PERF_RECORD_MMAP:
921 start = time.time()
922 mmap_info = trace_reader.ReadMmap(header, offset)
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000923 if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
Ben Murdoch257744e2011-11-30 15:57:28 +0000924 log_reader.ReadUpToGC()
Ben Murdochf87a2032010-10-22 12:50:53 +0100925 else:
926 library_repo.Load(mmap_info, code_map, options)
927 mmap_time += time.time() - start
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000928 elif header.type == PERF_RECORD_MMAP2:
929 start = time.time()
930 mmap_info = trace_reader.ReadMmap2(header, offset)
931 if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
932 log_reader.ReadUpToGC()
933 else:
934 library_repo.Load(mmap_info, code_map, options)
935 mmap_time += time.time() - start
Ben Murdochf87a2032010-10-22 12:50:53 +0100936 elif header.type == PERF_RECORD_SAMPLE:
937 ticks += 1
938 start = time.time()
939 sample = trace_reader.ReadSample(header, offset)
940 code = code_map.Find(sample.ip)
941 if code:
942 code.Tick(sample.ip)
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000943 if code.codetype == Code.OPTIMIZED:
944 optimized_ticks += 1
945 elif code.codetype == Code.FULL_CODEGEN:
946 generated_ticks += 1
947 elif code.codetype == Code.V8INTERNAL:
948 v8_internal_ticks += 1
Ben Murdochf87a2032010-10-22 12:50:53 +0100949 else:
950 missed_ticks += 1
951 if not library_repo.Tick(sample.ip) and not code:
952 really_missed_ticks += 1
953 if trace_reader.callchain_supported:
954 for ip in sample.ips:
955 caller_code = code_map.Find(ip)
956 if caller_code:
957 if code:
958 caller_code.CalleeTick(code)
959 code = caller_code
960 sample_time += time.time() - start
961
962 if options.dot:
963 PrintDot(code_map, options)
964 else:
Ben Murdoch257744e2011-11-30 15:57:28 +0000965 PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
Ben Murdochf87a2032010-10-22 12:50:53 +0100966
967 if not options.quiet:
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000968 def PrintTicks(number, total, description):
969 print("%10d %5.1f%% ticks in %s" %
970 (number, 100.0*number/total, description))
Ben Murdochf87a2032010-10-22 12:50:53 +0100971 print
972 print "Stats:"
973 print "%10d total trace events" % events
974 print "%10d total ticks" % ticks
975 print "%10d ticks not in symbols" % missed_ticks
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000976 unaccounted = "unaccounted ticks"
977 if really_missed_ticks > 0:
978 unaccounted += " (probably in the kernel, try --kernel)"
979 PrintTicks(really_missed_ticks, ticks, unaccounted)
980 PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
981 PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
982 PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
Ben Murdochf87a2032010-10-22 12:50:53 +0100983 print "%10d total symbols" % len([c for c in code_map.AllCode()])
984 print "%10d used symbols" % len([c for c in code_map.UsedCode()])
985 print "%9.2fs library processing time" % mmap_time
986 print "%9.2fs tick processing time" % sample_time
987
988 log_reader.Dispose()
989 trace_reader.Dispose()