blob: 581278c5848877ea1a90cfdbf8045afd23298c0c [file] [log] [blame]
Janosch Frankf9bc9e62016-05-18 13:26:21 +02001#!/usr/bin/python
2#
3# top-like utility for displaying kvm statistics
4#
5# Copyright 2006-2008 Qumranet Technologies
6# Copyright 2008-2011 Red Hat, Inc.
7#
8# Authors:
9# Avi Kivity <avi@redhat.com>
10#
11# This work is licensed under the terms of the GNU GPL, version 2. See
12# the COPYING file in the top-level directory.
Janosch Frankfabc7122016-05-18 13:26:25 +020013"""The kvm_stat module outputs statistics about running KVM VMs
14
15Three different ways of output formatting are available:
16- as a top-like text ui
17- in a key -> value format
18- in an all keys, all values format
19
20The data is sampled from the KVM's debugfs entries and its perf events.
21"""
Janosch Frankf9bc9e62016-05-18 13:26:21 +020022
23import curses
24import sys
25import os
26import time
27import optparse
28import ctypes
29import fcntl
30import resource
31import struct
32import re
33from collections import defaultdict
34from time import sleep
35
36VMX_EXIT_REASONS = {
37 'EXCEPTION_NMI': 0,
38 'EXTERNAL_INTERRUPT': 1,
39 'TRIPLE_FAULT': 2,
40 'PENDING_INTERRUPT': 7,
41 'NMI_WINDOW': 8,
42 'TASK_SWITCH': 9,
43 'CPUID': 10,
44 'HLT': 12,
45 'INVLPG': 14,
46 'RDPMC': 15,
47 'RDTSC': 16,
48 'VMCALL': 18,
49 'VMCLEAR': 19,
50 'VMLAUNCH': 20,
51 'VMPTRLD': 21,
52 'VMPTRST': 22,
53 'VMREAD': 23,
54 'VMRESUME': 24,
55 'VMWRITE': 25,
56 'VMOFF': 26,
57 'VMON': 27,
58 'CR_ACCESS': 28,
59 'DR_ACCESS': 29,
60 'IO_INSTRUCTION': 30,
61 'MSR_READ': 31,
62 'MSR_WRITE': 32,
63 'INVALID_STATE': 33,
64 'MWAIT_INSTRUCTION': 36,
65 'MONITOR_INSTRUCTION': 39,
66 'PAUSE_INSTRUCTION': 40,
67 'MCE_DURING_VMENTRY': 41,
68 'TPR_BELOW_THRESHOLD': 43,
69 'APIC_ACCESS': 44,
70 'EPT_VIOLATION': 48,
71 'EPT_MISCONFIG': 49,
72 'WBINVD': 54,
73 'XSETBV': 55,
74 'APIC_WRITE': 56,
75 'INVPCID': 58,
76}
77
78SVM_EXIT_REASONS = {
79 'READ_CR0': 0x000,
80 'READ_CR3': 0x003,
81 'READ_CR4': 0x004,
82 'READ_CR8': 0x008,
83 'WRITE_CR0': 0x010,
84 'WRITE_CR3': 0x013,
85 'WRITE_CR4': 0x014,
86 'WRITE_CR8': 0x018,
87 'READ_DR0': 0x020,
88 'READ_DR1': 0x021,
89 'READ_DR2': 0x022,
90 'READ_DR3': 0x023,
91 'READ_DR4': 0x024,
92 'READ_DR5': 0x025,
93 'READ_DR6': 0x026,
94 'READ_DR7': 0x027,
95 'WRITE_DR0': 0x030,
96 'WRITE_DR1': 0x031,
97 'WRITE_DR2': 0x032,
98 'WRITE_DR3': 0x033,
99 'WRITE_DR4': 0x034,
100 'WRITE_DR5': 0x035,
101 'WRITE_DR6': 0x036,
102 'WRITE_DR7': 0x037,
103 'EXCP_BASE': 0x040,
104 'INTR': 0x060,
105 'NMI': 0x061,
106 'SMI': 0x062,
107 'INIT': 0x063,
108 'VINTR': 0x064,
109 'CR0_SEL_WRITE': 0x065,
110 'IDTR_READ': 0x066,
111 'GDTR_READ': 0x067,
112 'LDTR_READ': 0x068,
113 'TR_READ': 0x069,
114 'IDTR_WRITE': 0x06a,
115 'GDTR_WRITE': 0x06b,
116 'LDTR_WRITE': 0x06c,
117 'TR_WRITE': 0x06d,
118 'RDTSC': 0x06e,
119 'RDPMC': 0x06f,
120 'PUSHF': 0x070,
121 'POPF': 0x071,
122 'CPUID': 0x072,
123 'RSM': 0x073,
124 'IRET': 0x074,
125 'SWINT': 0x075,
126 'INVD': 0x076,
127 'PAUSE': 0x077,
128 'HLT': 0x078,
129 'INVLPG': 0x079,
130 'INVLPGA': 0x07a,
131 'IOIO': 0x07b,
132 'MSR': 0x07c,
133 'TASK_SWITCH': 0x07d,
134 'FERR_FREEZE': 0x07e,
135 'SHUTDOWN': 0x07f,
136 'VMRUN': 0x080,
137 'VMMCALL': 0x081,
138 'VMLOAD': 0x082,
139 'VMSAVE': 0x083,
140 'STGI': 0x084,
141 'CLGI': 0x085,
142 'SKINIT': 0x086,
143 'RDTSCP': 0x087,
144 'ICEBP': 0x088,
145 'WBINVD': 0x089,
146 'MONITOR': 0x08a,
147 'MWAIT': 0x08b,
148 'MWAIT_COND': 0x08c,
149 'XSETBV': 0x08d,
150 'NPF': 0x400,
151}
152
153# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
154AARCH64_EXIT_REASONS = {
155 'UNKNOWN': 0x00,
156 'WFI': 0x01,
157 'CP15_32': 0x03,
158 'CP15_64': 0x04,
159 'CP14_MR': 0x05,
160 'CP14_LS': 0x06,
161 'FP_ASIMD': 0x07,
162 'CP10_ID': 0x08,
163 'CP14_64': 0x0C,
164 'ILL_ISS': 0x0E,
165 'SVC32': 0x11,
166 'HVC32': 0x12,
167 'SMC32': 0x13,
168 'SVC64': 0x15,
169 'HVC64': 0x16,
170 'SMC64': 0x17,
171 'SYS64': 0x18,
172 'IABT': 0x20,
173 'IABT_HYP': 0x21,
174 'PC_ALIGN': 0x22,
175 'DABT': 0x24,
176 'DABT_HYP': 0x25,
177 'SP_ALIGN': 0x26,
178 'FP_EXC32': 0x28,
179 'FP_EXC64': 0x2C,
180 'SERROR': 0x2F,
181 'BREAKPT': 0x30,
182 'BREAKPT_HYP': 0x31,
183 'SOFTSTP': 0x32,
184 'SOFTSTP_HYP': 0x33,
185 'WATCHPT': 0x34,
186 'WATCHPT_HYP': 0x35,
187 'BKPT32': 0x38,
188 'VECTOR32': 0x3A,
189 'BRK64': 0x3C,
190}
191
192# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
193USERSPACE_EXIT_REASONS = {
194 'UNKNOWN': 0,
195 'EXCEPTION': 1,
196 'IO': 2,
197 'HYPERCALL': 3,
198 'DEBUG': 4,
199 'HLT': 5,
200 'MMIO': 6,
201 'IRQ_WINDOW_OPEN': 7,
202 'SHUTDOWN': 8,
203 'FAIL_ENTRY': 9,
204 'INTR': 10,
205 'SET_TPR': 11,
206 'TPR_ACCESS': 12,
207 'S390_SIEIC': 13,
208 'S390_RESET': 14,
209 'DCR': 15,
210 'NMI': 16,
211 'INTERNAL_ERROR': 17,
212 'OSI': 18,
213 'PAPR_HCALL': 19,
214 'S390_UCONTROL': 20,
215 'WATCHDOG': 21,
216 'S390_TSCH': 22,
217 'EPR': 23,
218 'SYSTEM_EVENT': 24,
219}
220
221IOCTL_NUMBERS = {
222 'SET_FILTER': 0x40082406,
223 'ENABLE': 0x00002400,
224 'DISABLE': 0x00002401,
225 'RESET': 0x00002403,
226}
227
228class Arch(object):
Janosch Frankfabc7122016-05-18 13:26:25 +0200229 """Encapsulates global architecture specific data.
230
231 Contains the performance event open syscall and ioctl numbers, as
232 well as the VM exit reasons for the architecture it runs on.
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200233
234 """
235 @staticmethod
236 def get_arch():
237 machine = os.uname()[4]
238
239 if machine.startswith('ppc'):
240 return ArchPPC()
241 elif machine.startswith('aarch64'):
242 return ArchA64()
243 elif machine.startswith('s390'):
244 return ArchS390()
245 else:
246 # X86_64
247 for line in open('/proc/cpuinfo'):
248 if not line.startswith('flags'):
249 continue
250
251 flags = line.split()
252 if 'vmx' in flags:
253 return ArchX86(VMX_EXIT_REASONS)
254 if 'svm' in flags:
255 return ArchX86(SVM_EXIT_REASONS)
256 return
257
258class ArchX86(Arch):
259 def __init__(self, exit_reasons):
260 self.sc_perf_evt_open = 298
261 self.ioctl_numbers = IOCTL_NUMBERS
262 self.exit_reasons = exit_reasons
263
264class ArchPPC(Arch):
265 def __init__(self):
266 self.sc_perf_evt_open = 319
267 self.ioctl_numbers = IOCTL_NUMBERS
268 self.ioctl_numbers['ENABLE'] = 0x20002400
269 self.ioctl_numbers['DISABLE'] = 0x20002401
Hemant Kumarc7d4fb52016-04-19 08:54:54 +0530270 self.ioctl_numbers['RESET'] = 0x20002403
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200271
272 # PPC comes in 32 and 64 bit and some generated ioctl
273 # numbers depend on the wordsize.
274 char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
275 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
Hemant Kumarc7d4fb52016-04-19 08:54:54 +0530276 self.exit_reasons = {}
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200277
278class ArchA64(Arch):
279 def __init__(self):
280 self.sc_perf_evt_open = 241
281 self.ioctl_numbers = IOCTL_NUMBERS
282 self.exit_reasons = AARCH64_EXIT_REASONS
283
284class ArchS390(Arch):
285 def __init__(self):
286 self.sc_perf_evt_open = 331
287 self.ioctl_numbers = IOCTL_NUMBERS
288 self.exit_reasons = None
289
290ARCH = Arch.get_arch()
291
292
293def walkdir(path):
294 """Returns os.walk() data for specified directory.
295
296 As it is only a wrapper it returns the same 3-tuple of (dirpath,
297 dirnames, filenames).
298 """
299 return next(os.walk(path))
300
301
302def parse_int_list(list_string):
303 """Returns an int list from a string of comma separated integers and
304 integer ranges."""
305 integers = []
306 members = list_string.split(',')
307
308 for member in members:
309 if '-' not in member:
310 integers.append(int(member))
311 else:
312 int_range = member.split('-')
313 integers.extend(range(int(int_range[0]),
314 int(int_range[1]) + 1))
315
316 return integers
317
318
319def get_online_cpus():
Janosch Frankfabc7122016-05-18 13:26:25 +0200320 """Returns a list of cpu id integers."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200321 with open('/sys/devices/system/cpu/online') as cpu_list:
322 cpu_string = cpu_list.readline()
323 return parse_int_list(cpu_string)
324
325
326def get_filters():
Janosch Frankfabc7122016-05-18 13:26:25 +0200327 """Returns a dict of trace events, their filter ids and
328 the values that can be filtered.
329
330 Trace events can be filtered for special values by setting a
331 filter string via an ioctl. The string normally has the format
332 identifier==value. For each filter a new event will be created, to
333 be able to distinguish the events.
334
335 """
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200336 filters = {}
337 filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
338 if ARCH.exit_reasons:
339 filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
340 return filters
341
342libc = ctypes.CDLL('libc.so.6', use_errno=True)
343syscall = libc.syscall
344
345class perf_event_attr(ctypes.Structure):
Janosch Frankfabc7122016-05-18 13:26:25 +0200346 """Struct that holds the necessary data to set up a trace event.
347
348 For an extensive explanation see perf_event_open(2) and
349 include/uapi/linux/perf_event.h, struct perf_event_attr
350
351 All fields that are not initialized in the constructor are 0.
352
353 """
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200354 _fields_ = [('type', ctypes.c_uint32),
355 ('size', ctypes.c_uint32),
356 ('config', ctypes.c_uint64),
357 ('sample_freq', ctypes.c_uint64),
358 ('sample_type', ctypes.c_uint64),
359 ('read_format', ctypes.c_uint64),
360 ('flags', ctypes.c_uint64),
361 ('wakeup_events', ctypes.c_uint32),
362 ('bp_type', ctypes.c_uint32),
363 ('bp_addr', ctypes.c_uint64),
364 ('bp_len', ctypes.c_uint64),
365 ]
366
367 def __init__(self):
368 super(self.__class__, self).__init__()
369 self.type = PERF_TYPE_TRACEPOINT
370 self.size = ctypes.sizeof(self)
371 self.read_format = PERF_FORMAT_GROUP
372
373def perf_event_open(attr, pid, cpu, group_fd, flags):
Janosch Frankfabc7122016-05-18 13:26:25 +0200374 """Wrapper for the sys_perf_evt_open() syscall.
375
376 Used to set up performance events, returns a file descriptor or -1
377 on error.
378
379 Attributes are:
380 - syscall number
381 - struct perf_event_attr *
382 - pid or -1 to monitor all pids
383 - cpu number or -1 to monitor all cpus
384 - The file descriptor of the group leader or -1 to create a group.
385 - flags
386
387 """
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200388 return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
389 ctypes.c_int(pid), ctypes.c_int(cpu),
390 ctypes.c_int(group_fd), ctypes.c_long(flags))
391
392PERF_TYPE_TRACEPOINT = 2
393PERF_FORMAT_GROUP = 1 << 3
394
395PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
396PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
397
398class Group(object):
Janosch Frankfabc7122016-05-18 13:26:25 +0200399 """Represents a perf event group."""
400
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200401 def __init__(self):
402 self.events = []
403
404 def add_event(self, event):
405 self.events.append(event)
406
407 def read(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200408 """Returns a dict with 'event name: value' for all events in the
409 group.
410
411 Values are read by reading from the file descriptor of the
412 event that is the group leader. See perf_event_open(2) for
413 details.
414
415 Read format for the used event configuration is:
416 struct read_format {
417 u64 nr; /* The number of events */
418 struct {
419 u64 value; /* The value of the event */
420 } values[nr];
421 };
422
423 """
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200424 length = 8 * (1 + len(self.events))
425 read_format = 'xxxxxxxx' + 'Q' * len(self.events)
426 return dict(zip([event.name for event in self.events],
427 struct.unpack(read_format,
428 os.read(self.events[0].fd, length))))
429
430class Event(object):
Janosch Frankfabc7122016-05-18 13:26:25 +0200431 """Represents a performance event and manages its life cycle."""
Janosch Frankf0cf0402016-05-18 13:26:24 +0200432 def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
433 trace_filter, trace_set='kvm'):
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200434 self.name = name
435 self.fd = None
Janosch Frankf0cf0402016-05-18 13:26:24 +0200436 self.setup_event(group, trace_cpu, trace_pid, trace_point,
437 trace_filter, trace_set)
438
439 def __del__(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200440 """Closes the event's file descriptor.
441
442 As no python file object was created for the file descriptor,
443 python will not reference count the descriptor and will not
444 close it itself automatically, so we do it.
445
446 """
Janosch Frankf0cf0402016-05-18 13:26:24 +0200447 if self.fd:
448 os.close(self.fd)
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200449
450 def setup_event_attribute(self, trace_set, trace_point):
Janosch Frankfabc7122016-05-18 13:26:25 +0200451 """Returns an initialized ctype perf_event_attr struct."""
452
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200453 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
454 trace_point, 'id')
455
456 event_attr = perf_event_attr()
457 event_attr.config = int(open(id_path).read())
458 return event_attr
459
Janosch Frankf0cf0402016-05-18 13:26:24 +0200460 def setup_event(self, group, trace_cpu, trace_pid, trace_point,
461 trace_filter, trace_set):
Janosch Frankfabc7122016-05-18 13:26:25 +0200462 """Sets up the perf event in Linux.
463
464 Issues the syscall to register the event in the kernel and
465 then sets the optional filter.
466
467 """
468
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200469 event_attr = self.setup_event_attribute(trace_set, trace_point)
470
Janosch Frankfabc7122016-05-18 13:26:25 +0200471 # First event will be group leader.
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200472 group_leader = -1
Janosch Frankfabc7122016-05-18 13:26:25 +0200473
474 # All others have to pass the leader's descriptor instead.
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200475 if group.events:
476 group_leader = group.events[0].fd
477
Janosch Frankf0cf0402016-05-18 13:26:24 +0200478 fd = perf_event_open(event_attr, trace_pid,
479 trace_cpu, group_leader, 0)
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200480 if fd == -1:
481 err = ctypes.get_errno()
482 raise OSError(err, os.strerror(err),
483 'while calling sys_perf_event_open().')
484
485 if trace_filter:
486 fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
487 trace_filter)
488
489 self.fd = fd
490
491 def enable(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200492 """Enables the trace event in the kernel.
493
494 Enabling the group leader makes reading counters from it and the
495 events under it possible.
496
497 """
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200498 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
499
500 def disable(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200501 """Disables the trace event in the kernel.
502
503 Disabling the group leader makes reading all counters under it
504 impossible.
505
506 """
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200507 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
508
509 def reset(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200510 """Resets the count of the trace event in the kernel."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200511 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
512
513class TracepointProvider(object):
Janosch Frankfabc7122016-05-18 13:26:25 +0200514 """Data provider for the stats class.
515
516 Manages the events/groups from which it acquires its data.
517
518 """
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200519 def __init__(self):
520 self.group_leaders = []
521 self.filters = get_filters()
522 self._fields = self.get_available_fields()
Janosch Frankf0cf0402016-05-18 13:26:24 +0200523 self._pid = 0
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200524
525 def get_available_fields(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200526 """Returns a list of available event's of format 'event name(filter
527 name)'.
528
529 All available events have directories under
530 /sys/kernel/debug/tracing/events/ which export information
531 about the specific event. Therefore, listing the dirs gives us
532 a list of all available events.
533
534 Some events like the vm exit reasons can be filtered for
535 specific values. To take account for that, the routine below
536 creates special fields with the following format:
537 event name(filter name)
538
539 """
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200540 path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
541 fields = walkdir(path)[1]
542 extra = []
543 for field in fields:
544 if field in self.filters:
545 filter_name_, filter_dicts = self.filters[field]
546 for name in filter_dicts:
547 extra.append(field + '(' + name + ')')
548 fields += extra
549 return fields
550
551 def setup_traces(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200552 """Creates all event and group objects needed to be able to retrieve
553 data."""
Janosch Frankf0cf0402016-05-18 13:26:24 +0200554 if self._pid > 0:
555 # Fetch list of all threads of the monitored pid, as qemu
556 # starts a thread for each vcpu.
557 path = os.path.join('/proc', str(self._pid), 'task')
558 groupids = walkdir(path)[1]
559 else:
560 groupids = get_online_cpus()
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200561
562 # The constant is needed as a buffer for python libs, std
563 # streams and other files that the script opens.
Janosch Frankf0cf0402016-05-18 13:26:24 +0200564 newlim = len(groupids) * len(self._fields) + 50
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200565 try:
566 softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
567
568 if hardlim < newlim:
569 # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
570 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
571 else:
572 # Raising the soft limit is sufficient.
573 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
574
575 except ValueError:
576 sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
577
Janosch Frankf0cf0402016-05-18 13:26:24 +0200578 for groupid in groupids:
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200579 group = Group()
580 for name in self._fields:
581 tracepoint = name
582 tracefilter = None
583 match = re.match(r'(.*)\((.*)\)', name)
584 if match:
585 tracepoint, sub = match.groups()
586 tracefilter = ('%s==%d\0' %
587 (self.filters[tracepoint][0],
588 self.filters[tracepoint][1][sub]))
589
Janosch Frankf0cf0402016-05-18 13:26:24 +0200590 # From perf_event_open(2):
591 # pid > 0 and cpu == -1
592 # This measures the specified process/thread on any CPU.
593 #
594 # pid == -1 and cpu >= 0
595 # This measures all processes/threads on the specified CPU.
596 trace_cpu = groupid if self._pid == 0 else -1
597 trace_pid = int(groupid) if self._pid != 0 else -1
598
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200599 group.add_event(Event(name=name,
600 group=group,
Janosch Frankf0cf0402016-05-18 13:26:24 +0200601 trace_cpu=trace_cpu,
602 trace_pid=trace_pid,
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200603 trace_point=tracepoint,
604 trace_filter=tracefilter))
Janosch Frankf0cf0402016-05-18 13:26:24 +0200605
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200606 self.group_leaders.append(group)
607
608 def available_fields(self):
609 return self.get_available_fields()
610
611 @property
612 def fields(self):
613 return self._fields
614
615 @fields.setter
616 def fields(self, fields):
Janosch Frankfabc7122016-05-18 13:26:25 +0200617 """Enables/disables the (un)wanted events"""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200618 self._fields = fields
619 for group in self.group_leaders:
620 for index, event in enumerate(group.events):
621 if event.name in fields:
622 event.reset()
623 event.enable()
624 else:
625 # Do not disable the group leader.
626 # It would disable all of its events.
627 if index != 0:
628 event.disable()
629
Janosch Frankf0cf0402016-05-18 13:26:24 +0200630 @property
631 def pid(self):
632 return self._pid
633
634 @pid.setter
635 def pid(self, pid):
Janosch Frankfabc7122016-05-18 13:26:25 +0200636 """Changes the monitored pid by setting new traces."""
Janosch Frankf0cf0402016-05-18 13:26:24 +0200637 self._pid = pid
Janosch Frankfabc7122016-05-18 13:26:25 +0200638 # The garbage collector will get rid of all Event/Group
639 # objects and open files after removing the references.
Janosch Frankf0cf0402016-05-18 13:26:24 +0200640 self.group_leaders = []
641 self.setup_traces()
642 self.fields = self._fields
643
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200644 def read(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200645 """Returns 'event name: current value' for all enabled events."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200646 ret = defaultdict(int)
647 for group in self.group_leaders:
648 for name, val in group.read().iteritems():
649 if name in self._fields:
650 ret[name] += val
651 return ret
652
653class DebugfsProvider(object):
Janosch Frankfabc7122016-05-18 13:26:25 +0200654 """Provides data from the files that KVM creates in the kvm debugfs
655 folder."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200656 def __init__(self):
657 self._fields = self.get_available_fields()
Janosch Frankf0cf0402016-05-18 13:26:24 +0200658 self._pid = 0
659 self.do_read = True
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200660
661 def get_available_fields(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200662 """"Returns a list of available fields.
663
664 The fields are all available KVM debugfs files
665
666 """
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200667 return walkdir(PATH_DEBUGFS_KVM)[2]
668
669 @property
670 def fields(self):
671 return self._fields
672
673 @fields.setter
674 def fields(self, fields):
675 self._fields = fields
676
Janosch Frankf0cf0402016-05-18 13:26:24 +0200677 @property
678 def pid(self):
679 return self._pid
680
681 @pid.setter
682 def pid(self, pid):
683 if pid != 0:
684 self._pid = pid
685
686 vms = walkdir(PATH_DEBUGFS_KVM)[1]
687 if len(vms) == 0:
688 self.do_read = False
689
690 self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
691
692 else:
693 self.paths = ['']
694 self.do_read = True
695
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200696 def read(self):
Janosch Frankf0cf0402016-05-18 13:26:24 +0200697 """Returns a dict with format:'file name / field -> current value'."""
698 results = {}
699
700 # If no debugfs filtering support is available, then don't read.
701 if not self.do_read:
702 return results
703
704 for path in self.paths:
705 for field in self._fields:
706 results[field] = results.get(field, 0) \
707 + self.read_field(field, path)
708
709 return results
710
711 def read_field(self, field, path):
712 """Returns the value of a single field from a specific VM."""
713 try:
714 return int(open(os.path.join(PATH_DEBUGFS_KVM,
715 path,
716 field))
717 .read())
718 except IOError:
719 return 0
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200720
721class Stats(object):
Janosch Frankfabc7122016-05-18 13:26:25 +0200722 """Manages the data providers and the data they provide.
723
724 It is used to set filters on the provider's data and collect all
725 provider data.
726
727 """
Janosch Frankf0cf0402016-05-18 13:26:24 +0200728 def __init__(self, providers, pid, fields=None):
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200729 self.providers = providers
Janosch Frankf0cf0402016-05-18 13:26:24 +0200730 self._pid_filter = pid
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200731 self._fields_filter = fields
732 self.values = {}
Janosch Frankf0cf0402016-05-18 13:26:24 +0200733 self.update_provider_pid()
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200734 self.update_provider_filters()
735
736 def update_provider_filters(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200737 """Propagates fields filters to providers."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200738 def wanted(key):
739 if not self._fields_filter:
740 return True
741 return re.match(self._fields_filter, key) is not None
742
743 # As we reset the counters when updating the fields we can
744 # also clear the cache of old values.
745 self.values = {}
746 for provider in self.providers:
747 provider_fields = [key for key in provider.get_available_fields()
748 if wanted(key)]
749 provider.fields = provider_fields
750
Janosch Frankf0cf0402016-05-18 13:26:24 +0200751 def update_provider_pid(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200752 """Propagates pid filters to providers."""
Janosch Frankf0cf0402016-05-18 13:26:24 +0200753 for provider in self.providers:
754 provider.pid = self._pid_filter
755
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200756 @property
757 def fields_filter(self):
758 return self._fields_filter
759
760 @fields_filter.setter
761 def fields_filter(self, fields_filter):
762 self._fields_filter = fields_filter
763 self.update_provider_filters()
764
Janosch Frankf0cf0402016-05-18 13:26:24 +0200765 @property
766 def pid_filter(self):
767 return self._pid_filter
768
769 @pid_filter.setter
770 def pid_filter(self, pid):
771 self._pid_filter = pid
772 self.values = {}
773 self.update_provider_pid()
774
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200775 def get(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200776 """Returns a dict with field -> (value, delta to last value) of all
777 provider data."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200778 for provider in self.providers:
779 new = provider.read()
780 for key in provider.fields:
781 oldval = self.values.get(key, (0, 0))
782 newval = new.get(key, 0)
783 newdelta = None
784 if oldval is not None:
785 newdelta = newval - oldval[0]
786 self.values[key] = (newval, newdelta)
787 return self.values
788
789LABEL_WIDTH = 40
790NUMBER_WIDTH = 10
791
792class Tui(object):
Janosch Frankfabc7122016-05-18 13:26:25 +0200793 """Instruments curses to draw a nice text ui."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200794 def __init__(self, stats):
795 self.stats = stats
796 self.screen = None
797 self.drilldown = False
798 self.update_drilldown()
799
800 def __enter__(self):
801 """Initialises curses for later use. Based on curses.wrapper
802 implementation from the Python standard library."""
803 self.screen = curses.initscr()
804 curses.noecho()
805 curses.cbreak()
806
807 # The try/catch works around a minor bit of
808 # over-conscientiousness in the curses module, the error
809 # return from C start_color() is ignorable.
810 try:
811 curses.start_color()
812 except:
813 pass
814
815 curses.use_default_colors()
816 return self
817
818 def __exit__(self, *exception):
819 """Resets the terminal to its normal state. Based on curses.wrappre
820 implementation from the Python standard library."""
821 if self.screen:
822 self.screen.keypad(0)
823 curses.echo()
824 curses.nocbreak()
825 curses.endwin()
826
827 def update_drilldown(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200828 """Sets or removes a filter that only allows fields without braces."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200829 if not self.stats.fields_filter:
830 self.stats.fields_filter = r'^[^\(]*$'
831
832 elif self.stats.fields_filter == r'^[^\(]*$':
833 self.stats.fields_filter = None
834
Janosch Frankf0cf0402016-05-18 13:26:24 +0200835 def update_pid(self, pid):
Janosch Frankfabc7122016-05-18 13:26:25 +0200836 """Propagates pid selection to stats object."""
Janosch Frankf0cf0402016-05-18 13:26:24 +0200837 self.stats.pid_filter = pid
838
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200839 def refresh(self, sleeptime):
Janosch Frankfabc7122016-05-18 13:26:25 +0200840 """Refreshes on-screen data."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200841 self.screen.erase()
Janosch Frankf0cf0402016-05-18 13:26:24 +0200842 if self.stats.pid_filter > 0:
843 self.screen.addstr(0, 0, 'kvm statistics - pid {0}'
844 .format(self.stats.pid_filter),
845 curses.A_BOLD)
846 else:
847 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200848 self.screen.addstr(2, 1, 'Event')
849 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
850 len('Total'), 'Total')
851 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
852 len('Current'), 'Current')
853 row = 3
854 stats = self.stats.get()
855 def sortkey(x):
856 if stats[x][1]:
857 return (-stats[x][1], -stats[x][0])
858 else:
859 return (0, -stats[x][0])
860 for key in sorted(stats.keys(), key=sortkey):
861
862 if row >= self.screen.getmaxyx()[0]:
863 break
864 values = stats[key]
865 if not values[0] and not values[1]:
866 break
867 col = 1
868 self.screen.addstr(row, col, key)
869 col += LABEL_WIDTH
870 self.screen.addstr(row, col, '%10d' % (values[0],))
871 col += NUMBER_WIDTH
872 if values[1] is not None:
873 self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
874 row += 1
875 self.screen.refresh()
876
877 def show_filter_selection(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200878 """Draws filter selection mask.
879
880 Asks for a valid regex and sets the fields filter accordingly.
881
882 """
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200883 while True:
884 self.screen.erase()
885 self.screen.addstr(0, 0,
886 "Show statistics for events matching a regex.",
887 curses.A_BOLD)
888 self.screen.addstr(2, 0,
889 "Current regex: {0}"
890 .format(self.stats.fields_filter))
891 self.screen.addstr(3, 0, "New regex: ")
892 curses.echo()
893 regex = self.screen.getstr()
894 curses.noecho()
895 if len(regex) == 0:
896 return
897 try:
898 re.compile(regex)
899 self.stats.fields_filter = regex
900 return
901 except re.error:
902 continue
903
Janosch Frankf0cf0402016-05-18 13:26:24 +0200904 def show_vm_selection(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200905 """Draws PID selection mask.
906
907 Asks for a pid until a valid pid or 0 has been entered.
908
909 """
Janosch Frankf0cf0402016-05-18 13:26:24 +0200910 while True:
911 self.screen.erase()
912 self.screen.addstr(0, 0,
913 'Show statistics for specific pid.',
914 curses.A_BOLD)
915 self.screen.addstr(1, 0,
916 'This might limit the shown data to the trace '
917 'statistics.')
918
919 curses.echo()
920 self.screen.addstr(3, 0, "Pid [0 or pid]: ")
921 pid = self.screen.getstr()
922 curses.noecho()
923
924 try:
925 pid = int(pid)
926
927 if pid == 0:
928 self.update_pid(pid)
929 break
930 else:
931 if not os.path.isdir(os.path.join('/proc/', str(pid))):
932 continue
933 else:
934 self.update_pid(pid)
935 break
936
937 except ValueError:
938 continue
939
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200940 def show_stats(self):
Janosch Frankfabc7122016-05-18 13:26:25 +0200941 """Refreshes the screen and processes user input."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200942 sleeptime = 0.25
943 while True:
944 self.refresh(sleeptime)
945 curses.halfdelay(int(sleeptime * 10))
946 sleeptime = 3
947 try:
948 char = self.screen.getkey()
949 if char == 'x':
950 self.drilldown = not self.drilldown
951 self.update_drilldown()
952 if char == 'q':
953 break
954 if char == 'f':
955 self.show_filter_selection()
Janosch Frankf0cf0402016-05-18 13:26:24 +0200956 if char == 'p':
957 self.show_vm_selection()
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200958 except KeyboardInterrupt:
959 break
960 except curses.error:
961 continue
962
963def batch(stats):
Janosch Frankfabc7122016-05-18 13:26:25 +0200964 """Prints statistics in a key, value format."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200965 s = stats.get()
966 time.sleep(1)
967 s = stats.get()
968 for key in sorted(s.keys()):
969 values = s[key]
970 print '%-42s%10d%10d' % (key, values[0], values[1])
971
972def log(stats):
Janosch Frankfabc7122016-05-18 13:26:25 +0200973 """Prints statistics as reiterating key block, multiple value blocks."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200974 keys = sorted(stats.get().iterkeys())
975 def banner():
976 for k in keys:
977 print '%s' % k,
978 print
979 def statline():
980 s = stats.get()
981 for k in keys:
982 print ' %9d' % s[k][1],
983 print
984 line = 0
985 banner_repeat = 20
986 while True:
987 time.sleep(1)
988 if line % banner_repeat == 0:
989 banner()
990 statline()
991 line += 1
992
993def get_options():
Janosch Frankfabc7122016-05-18 13:26:25 +0200994 """Returns processed program arguments."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +0200995 description_text = """
996This script displays various statistics about VMs running under KVM.
997The statistics are gathered from the KVM debugfs entries and / or the
998currently available perf traces.
999
1000The monitoring takes additional cpu cycles and might affect the VM's
1001performance.
1002
1003Requirements:
1004- Access to:
1005 /sys/kernel/debug/kvm
1006 /sys/kernel/debug/trace/events/*
1007 /proc/pid/task
1008- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1009 CAP_SYS_ADMIN and perf events are used.
1010- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1011 the large number of files that are possibly opened.
1012"""
1013
1014 class PlainHelpFormatter(optparse.IndentedHelpFormatter):
1015 def format_description(self, description):
1016 if description:
1017 return description + "\n"
1018 else:
1019 return ""
1020
1021 optparser = optparse.OptionParser(description=description_text,
1022 formatter=PlainHelpFormatter())
1023 optparser.add_option('-1', '--once', '--batch',
1024 action='store_true',
1025 default=False,
1026 dest='once',
1027 help='run in batch mode for one second',
1028 )
1029 optparser.add_option('-l', '--log',
1030 action='store_true',
1031 default=False,
1032 dest='log',
1033 help='run in logging mode (like vmstat)',
1034 )
1035 optparser.add_option('-t', '--tracepoints',
1036 action='store_true',
1037 default=False,
1038 dest='tracepoints',
1039 help='retrieve statistics from tracepoints',
1040 )
1041 optparser.add_option('-d', '--debugfs',
1042 action='store_true',
1043 default=False,
1044 dest='debugfs',
1045 help='retrieve statistics from debugfs',
1046 )
1047 optparser.add_option('-f', '--fields',
1048 action='store',
1049 default=None,
1050 dest='fields',
1051 help='fields to display (regex)',
1052 )
Janosch Frankf0cf0402016-05-18 13:26:24 +02001053 optparser.add_option('-p', '--pid',
1054 action='store',
1055 default=0,
1056 type=int,
1057 dest='pid',
1058 help='restrict statistics to pid',
1059 )
Janosch Frankf9bc9e62016-05-18 13:26:21 +02001060 (options, _) = optparser.parse_args(sys.argv)
1061 return options
1062
1063def get_providers(options):
Janosch Frankfabc7122016-05-18 13:26:25 +02001064 """Returns a list of data providers depending on the passed options."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +02001065 providers = []
1066
1067 if options.tracepoints:
1068 providers.append(TracepointProvider())
1069 if options.debugfs:
1070 providers.append(DebugfsProvider())
1071 if len(providers) == 0:
1072 providers.append(TracepointProvider())
1073
1074 return providers
1075
1076def check_access(options):
Janosch Frankfabc7122016-05-18 13:26:25 +02001077 """Exits if the current user can't access all needed directories."""
Janosch Frankf9bc9e62016-05-18 13:26:21 +02001078 if not os.path.exists('/sys/kernel/debug'):
1079 sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
1080 sys.exit(1)
1081
1082 if not os.path.exists(PATH_DEBUGFS_KVM):
1083 sys.stderr.write("Please make sure, that debugfs is mounted and "
1084 "readable by the current user:\n"
1085 "('mount -t debugfs debugfs /sys/kernel/debug')\n"
1086 "Also ensure, that the kvm modules are loaded.\n")
1087 sys.exit(1)
1088
1089 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints
1090 or not options.debugfs):
1091 sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1092 "when using the option -t (default).\n"
1093 "If it is enabled, make {0} readable by the "
1094 "current user.\n"
1095 .format(PATH_DEBUGFS_TRACING))
1096 if options.tracepoints:
1097 sys.exit(1)
1098
1099 sys.stderr.write("Falling back to debugfs statistics!\n")
1100 options.debugfs = True
1101 sleep(5)
1102
1103 return options
1104
1105def main():
1106 options = get_options()
1107 options = check_access(options)
Janosch Frankf0cf0402016-05-18 13:26:24 +02001108
1109 if (options.pid > 0 and
1110 not os.path.isdir(os.path.join('/proc/',
1111 str(options.pid)))):
1112 sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1113 sys.exit('Specified pid does not exist.')
1114
Janosch Frankf9bc9e62016-05-18 13:26:21 +02001115 providers = get_providers(options)
Janosch Frankf0cf0402016-05-18 13:26:24 +02001116 stats = Stats(providers, options.pid, fields=options.fields)
Janosch Frankf9bc9e62016-05-18 13:26:21 +02001117
1118 if options.log:
1119 log(stats)
1120 elif not options.once:
1121 with Tui(stats) as tui:
1122 tui.show_stats()
1123 else:
1124 batch(stats)
1125
1126if __name__ == "__main__":
1127 main()