blob: c77947af0f991b325b2f96627cefa02ef89a7a5c [file] [log] [blame]
Alexey Ivanovcc01a9c2019-01-16 09:50:46 -08001#!/usr/bin/python
Brendan Gregg251823a2016-12-14 12:10:59 -08002# @lint-avoid-python-3-compatibility-imports
3#
4# runqlen Summarize scheduler run queue length as a histogram.
5# For Linux, uses BCC, eBPF.
6#
7# This counts the length of the run queue, excluding the currently running
8# thread, and shows it as a histogram.
9#
10# Also answers run queue occupancy.
11#
12# USAGE: runqlen [-h] [-T] [-Q] [-m] [-D] [interval] [count]
13#
14# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). Under tools/old is
15# a version of this tool that may work on Linux 4.6 - 4.8.
16#
17# Copyright 2016 Netflix, Inc.
18# Licensed under the Apache License, Version 2.0 (the "License")
19#
20# 12-Dec-2016 Brendan Gregg Created this.
21
22from __future__ import print_function
23from bcc import BPF, PerfType, PerfSWConfig
24from time import sleep, strftime
Yonghong Songffa47e62018-01-02 21:18:06 -080025from tempfile import NamedTemporaryFile
26from os import open, close, dup, unlink, O_WRONLY
Brendan Gregg251823a2016-12-14 12:10:59 -080027import argparse
28
29# arguments
30examples = """examples:
31 ./runqlen # summarize run queue length as a histogram
32 ./runqlen 1 10 # print 1 second summaries, 10 times
33 ./runqlen -T 1 # 1s summaries and timestamps
34 ./runqlen -O # report run queue occupancy
35 ./runqlen -C # show each CPU separately
36"""
37parser = argparse.ArgumentParser(
38 description="Summarize scheduler run queue length as a histogram",
39 formatter_class=argparse.RawDescriptionHelpFormatter,
40 epilog=examples)
41parser.add_argument("-T", "--timestamp", action="store_true",
42 help="include timestamp on output")
43parser.add_argument("-O", "--runqocc", action="store_true",
44 help="report run queue occupancy")
45parser.add_argument("-C", "--cpus", action="store_true",
46 help="print output for each CPU separately")
47parser.add_argument("interval", nargs="?", default=99999999,
48 help="output interval, in seconds")
49parser.add_argument("count", nargs="?", default=99999999,
50 help="number of outputs")
Nathan Scottcf0792f2018-02-02 16:56:50 +110051parser.add_argument("--ebpf", action="store_true",
52 help=argparse.SUPPRESS)
Brendan Gregg251823a2016-12-14 12:10:59 -080053args = parser.parse_args()
54countdown = int(args.count)
55debug = 0
56frequency = 99
57
Yonghong Songffa47e62018-01-02 21:18:06 -080058# Linux 4.15 introduced a new field runnable_weight
59# in linux_src:kernel/sched/sched.h as
60# struct cfs_rq {
61# struct load_weight load;
62# unsigned long runnable_weight;
63# unsigned int nr_running, h_nr_running;
64# ......
65# }
66# and this tool requires to access nr_running to get
67# runqueue len information.
68#
69# The commit which introduces cfs_rq->runnable_weight
70# field also introduces the field sched_entity->runnable_weight
71# where sched_entity is defined in linux_src:include/linux/sched.h.
72#
73# To cope with pre-4.15 and 4.15/post-4.15 releases,
74# we run a simple BPF program to detect whether
75# field sched_entity->runnable_weight exists. The existence of
76# this field should infer the existence of cfs_rq->runnable_weight.
77#
78# This will need maintenance as the relationship between these
79# two fields may change in the future.
80#
81def check_runnable_weight_field():
82 # Define the bpf program for checking purpose
83 bpf_check_text = """
84#include <linux/sched.h>
85unsigned long dummy(struct sched_entity *entity)
86{
87 return entity->runnable_weight;
88}
89"""
90
91 # Get a temporary file name
92 tmp_file = NamedTemporaryFile(delete=False)
93 tmp_file.close();
94
95 # Duplicate and close stderr (fd = 2)
96 old_stderr = dup(2)
97 close(2)
98
99 # Open a new file, should get fd number 2
100 # This will avoid printing llvm errors on the screen
101 fd = open(tmp_file.name, O_WRONLY)
102 try:
103 t = BPF(text=bpf_check_text)
104 success_compile = True
105 except:
106 success_compile = False
107
108 # Release the fd 2, and next dup should restore old stderr
109 close(fd)
110 dup(old_stderr)
111 close(old_stderr)
112
113 # remove the temporary file and return
114 unlink(tmp_file.name)
115 return success_compile
116
117
Brendan Gregg251823a2016-12-14 12:10:59 -0800118# define BPF program
119bpf_text = """
120#include <uapi/linux/ptrace.h>
121#include <linux/sched.h>
122
123// Declare enough of cfs_rq to find nr_running, since we can't #import the
124// header. This will need maintenance. It is from kernel/sched/sched.h:
125struct cfs_rq_partial {
126 struct load_weight load;
Yonghong Songffa47e62018-01-02 21:18:06 -0800127 RUNNABLE_WEIGHT_FIELD
Brendan Gregg251823a2016-12-14 12:10:59 -0800128 unsigned int nr_running, h_nr_running;
129};
130
131typedef struct cpu_key {
132 int cpu;
133 unsigned int slot;
134} cpu_key_t;
135STORAGE
136
137int do_perf_event()
138{
139 unsigned int len = 0;
140 pid_t pid = 0;
141 struct task_struct *task = NULL;
142 struct cfs_rq_partial *my_q = NULL;
143
144 // Fetch the run queue length from task->se.cfs_rq->nr_running. This is an
145 // unstable interface and may need maintenance. Perhaps a future version
146 // of BPF will support task_rq(p) or something similar as a more reliable
147 // interface.
148 task = (struct task_struct *)bpf_get_current_task();
Paul Chaignon719e1002017-08-06 14:33:20 +0200149 my_q = (struct cfs_rq_partial *)task->se.cfs_rq;
150 len = my_q->nr_running;
Brendan Gregg251823a2016-12-14 12:10:59 -0800151
Brendan Gregg06223652016-12-21 15:35:36 -0800152 // Calculate run queue length by subtracting the currently running task,
153 // if present. len 0 == idle, len 1 == one running task.
Brendan Gregg251823a2016-12-14 12:10:59 -0800154 if (len > 0)
155 len--;
156
157 STORE
158
159 return 0;
160}
161"""
162
Nathan Scottcf0792f2018-02-02 16:56:50 +1100163# code substitutions
Brendan Gregg251823a2016-12-14 12:10:59 -0800164if args.cpus:
165 bpf_text = bpf_text.replace('STORAGE',
166 'BPF_HISTOGRAM(dist, cpu_key_t);')
167 bpf_text = bpf_text.replace('STORE', 'cpu_key_t key = {.slot = len}; ' +
Brendan Gregged02ad82016-12-14 13:28:13 -0800168 'key.cpu = bpf_get_smp_processor_id(); ' +
Brendan Gregg251823a2016-12-14 12:10:59 -0800169 'dist.increment(key);')
170else:
171 bpf_text = bpf_text.replace('STORAGE',
172 'BPF_HISTOGRAM(dist, unsigned int);')
zcy80242fb2021-07-02 00:12:32 +0800173 bpf_text = bpf_text.replace('STORE', 'dist.atomic_increment(len);')
Brendan Gregg251823a2016-12-14 12:10:59 -0800174
Yonghong Songffa47e62018-01-02 21:18:06 -0800175if check_runnable_weight_field():
176 bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', 'unsigned long runnable_weight;')
177else:
178 bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', '')
179
Nathan Scottcf0792f2018-02-02 16:56:50 +1100180if debug or args.ebpf:
Brendan Gregg251823a2016-12-14 12:10:59 -0800181 print(bpf_text)
Nathan Scottcf0792f2018-02-02 16:56:50 +1100182 if args.ebpf:
183 exit()
Brendan Gregg251823a2016-12-14 12:10:59 -0800184
Brendan Gregg251823a2016-12-14 12:10:59 -0800185# initialize BPF & perf_events
186b = BPF(text=bpf_text)
187b.attach_perf_event(ev_type=PerfType.SOFTWARE,
188 ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event",
189 sample_period=0, sample_freq=frequency)
190
191print("Sampling run queue length... Hit Ctrl-C to end.")
192
193# output
194exiting = 0 if args.interval else 1
195dist = b.get_table("dist")
196while (1):
197 try:
198 sleep(int(args.interval))
199 except KeyboardInterrupt:
200 exiting = 1
201
202 print()
203 if args.timestamp:
204 print("%-8s\n" % strftime("%H:%M:%S"), end="")
205
206 if args.runqocc:
207 if args.cpus:
208 # run queue occupancy, per-CPU summary
209 idle = {}
210 queued = {}
211 cpumax = 0
212 for k, v in dist.items():
213 if k.cpu > cpumax:
214 cpumax = k.cpu
215 for c in range(0, cpumax + 1):
216 idle[c] = 0
217 queued[c] = 0
218 for k, v in dist.items():
219 if k.slot == 0:
220 idle[k.cpu] += v.value
221 else:
222 queued[k.cpu] += v.value
223 for c in range(0, cpumax + 1):
224 samples = idle[c] + queued[c]
225 if samples:
226 runqocc = float(queued[c]) / samples
227 else:
228 runqocc = 0
229 print("runqocc, CPU %-3d %6.2f%%" % (c, 100 * runqocc))
230
231 else:
232 # run queue occupancy, system-wide summary
233 idle = 0
234 queued = 0
235 for k, v in dist.items():
236 if k.value == 0:
237 idle += v.value
238 else:
239 queued += v.value
240 samples = idle + queued
241 if samples:
242 runqocc = float(queued) / samples
243 else:
244 runqocc = 0
245 print("runqocc: %0.2f%%" % (100 * runqocc))
246
247 else:
248 # run queue length histograms
249 dist.print_linear_hist("runqlen", "cpu")
250
251 dist.clear()
252
253 countdown -= 1
254 if exiting or countdown == 0:
255 exit()