blob: 204ce19e91e7a4de559d8b2933b53758163f1ffa [file] [log] [blame]
Brendan Gregg251823a2016-12-14 12:10:59 -08001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# runqlen Summarize scheduler run queue length as a histogram.
5# For Linux, uses BCC, eBPF.
6#
7# This counts the length of the run queue, excluding the currently running
8# thread, and shows it as a histogram.
9#
10# Also answers run queue occupancy.
11#
12# USAGE: runqlen [-h] [-T] [-Q] [-m] [-D] [interval] [count]
13#
14# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). Under tools/old is
15# a version of this tool that may work on Linux 4.6 - 4.8.
16#
17# Copyright 2016 Netflix, Inc.
18# Licensed under the Apache License, Version 2.0 (the "License")
19#
20# 12-Dec-2016 Brendan Gregg Created this.
21
22from __future__ import print_function
23from bcc import BPF, PerfType, PerfSWConfig
24from time import sleep, strftime
25import argparse
26
27# arguments
28examples = """examples:
29 ./runqlen # summarize run queue length as a histogram
30 ./runqlen 1 10 # print 1 second summaries, 10 times
31 ./runqlen -T 1 # 1s summaries and timestamps
32 ./runqlen -O # report run queue occupancy
33 ./runqlen -C # show each CPU separately
34"""
35parser = argparse.ArgumentParser(
36 description="Summarize scheduler run queue length as a histogram",
37 formatter_class=argparse.RawDescriptionHelpFormatter,
38 epilog=examples)
39parser.add_argument("-T", "--timestamp", action="store_true",
40 help="include timestamp on output")
41parser.add_argument("-O", "--runqocc", action="store_true",
42 help="report run queue occupancy")
43parser.add_argument("-C", "--cpus", action="store_true",
44 help="print output for each CPU separately")
45parser.add_argument("interval", nargs="?", default=99999999,
46 help="output interval, in seconds")
47parser.add_argument("count", nargs="?", default=99999999,
48 help="number of outputs")
49args = parser.parse_args()
50countdown = int(args.count)
51debug = 0
52frequency = 99
53
54# define BPF program
55bpf_text = """
56#include <uapi/linux/ptrace.h>
57#include <linux/sched.h>
58
59// Declare enough of cfs_rq to find nr_running, since we can't #import the
60// header. This will need maintenance. It is from kernel/sched/sched.h:
61struct cfs_rq_partial {
62 struct load_weight load;
63 unsigned int nr_running, h_nr_running;
64};
65
66typedef struct cpu_key {
67 int cpu;
68 unsigned int slot;
69} cpu_key_t;
70STORAGE
71
72int do_perf_event()
73{
74 unsigned int len = 0;
75 pid_t pid = 0;
76 struct task_struct *task = NULL;
77 struct cfs_rq_partial *my_q = NULL;
78
79 // Fetch the run queue length from task->se.cfs_rq->nr_running. This is an
80 // unstable interface and may need maintenance. Perhaps a future version
81 // of BPF will support task_rq(p) or something similar as a more reliable
82 // interface.
83 task = (struct task_struct *)bpf_get_current_task();
84 bpf_probe_read(&my_q, sizeof(my_q), &task->se.cfs_rq);
85 bpf_probe_read(&len, sizeof(len), &my_q->nr_running);
86
87 // Decrement idle thread by dropping the run queue by one. We could do
88 // this other ways if needed, like matching on task->pid.
89 if (len > 0)
90 len--;
91
92 STORE
93
94 return 0;
95}
96"""
97
98if args.cpus:
99 bpf_text = bpf_text.replace('STORAGE',
100 'BPF_HISTOGRAM(dist, cpu_key_t);')
101 bpf_text = bpf_text.replace('STORE', 'cpu_key_t key = {.slot = len}; ' +
102 'bpf_probe_read(&key.cpu, sizeof(key.cpu), &task->wake_cpu);' +
103 'dist.increment(key);')
104else:
105 bpf_text = bpf_text.replace('STORAGE',
106 'BPF_HISTOGRAM(dist, unsigned int);')
107 bpf_text = bpf_text.replace('STORE', 'dist.increment(len);')
108
109# code substitutions
110if debug:
111 print(bpf_text)
112
113# load BPF program
114b = BPF(text=bpf_text)
115# initialize BPF & perf_events
116b = BPF(text=bpf_text)
117b.attach_perf_event(ev_type=PerfType.SOFTWARE,
118 ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event",
119 sample_period=0, sample_freq=frequency)
120
121print("Sampling run queue length... Hit Ctrl-C to end.")
122
123# output
124exiting = 0 if args.interval else 1
125dist = b.get_table("dist")
126while (1):
127 try:
128 sleep(int(args.interval))
129 except KeyboardInterrupt:
130 exiting = 1
131
132 print()
133 if args.timestamp:
134 print("%-8s\n" % strftime("%H:%M:%S"), end="")
135
136 if args.runqocc:
137 if args.cpus:
138 # run queue occupancy, per-CPU summary
139 idle = {}
140 queued = {}
141 cpumax = 0
142 for k, v in dist.items():
143 if k.cpu > cpumax:
144 cpumax = k.cpu
145 for c in range(0, cpumax + 1):
146 idle[c] = 0
147 queued[c] = 0
148 for k, v in dist.items():
149 if k.slot == 0:
150 idle[k.cpu] += v.value
151 else:
152 queued[k.cpu] += v.value
153 for c in range(0, cpumax + 1):
154 samples = idle[c] + queued[c]
155 if samples:
156 runqocc = float(queued[c]) / samples
157 else:
158 runqocc = 0
159 print("runqocc, CPU %-3d %6.2f%%" % (c, 100 * runqocc))
160
161 else:
162 # run queue occupancy, system-wide summary
163 idle = 0
164 queued = 0
165 for k, v in dist.items():
166 if k.value == 0:
167 idle += v.value
168 else:
169 queued += v.value
170 samples = idle + queued
171 if samples:
172 runqocc = float(queued) / samples
173 else:
174 runqocc = 0
175 print("runqocc: %0.2f%%" % (100 * runqocc))
176
177 else:
178 # run queue length histograms
179 dist.print_linear_hist("runqlen", "cpu")
180
181 dist.clear()
182
183 countdown -= 1
184 if exiting or countdown == 0:
185 exit()