fix runqlen.py with 4.15 kernel
The following kernel commit changes linux_src:kernel/sched/sched.h
struct cfs_rq structure:
```
commit 1ea6c46a23f1213d1972bfae220db5c165e27bba
Author: Peter Zijlstra <peterz@infradead.org>
Date: Sat May 6 15:59:54 2017 +0200
sched/fair: Propagate an effective runnable_load_avg
The load balancer uses runnable_load_avg as load indicator. For
!cgroup this is:
runnable_load_avg = \Sum se->avg.load_avg ; where se->on_rq
That is, a direct sum of all runnable tasks on that runqueue. As
opposed to load_avg, which is a sum of all tasks on the runqueue,
which includes a blocked component.
...
```
The commit is in kernel 4.15 release and will make current
runqlen.py internal cfs_rq_partial structure not syncing with the kernel one.
As a result, runqlen.py will produce incorrect results on 4.15.
This patch attempts to solve this issue by compiling a bpf program,
which accesses one of fields introduced by the above commit.
The successful compilation will indicate that we should amend
the cfs_rq_partial structure.
Signed-off-by: Yonghong Song <yhs@fb.com>
diff --git a/tools/runqlen.py b/tools/runqlen.py
index 4a6bc48..e8430ca 100755
--- a/tools/runqlen.py
+++ b/tools/runqlen.py
@@ -22,6 +22,8 @@
from __future__ import print_function
from bcc import BPF, PerfType, PerfSWConfig
from time import sleep, strftime
+from tempfile import NamedTemporaryFile
+from os import open, close, dup, unlink, O_WRONLY
import argparse
# arguments
@@ -51,6 +53,66 @@
debug = 0
frequency = 99
+# Linux 4.15 introduced a new field runnable_weight
+# in linux_src:kernel/sched/sched.h as
+# struct cfs_rq {
+# struct load_weight load;
+# unsigned long runnable_weight;
+# unsigned int nr_running, h_nr_running;
+# ......
+# }
+# and this tool requires to access nr_running to get
+# runqueue len information.
+#
+# The commit which introduces cfs_rq->runnable_weight
+# field also introduces the field sched_entity->runnable_weight
+# where sched_entity is defined in linux_src:include/linux/sched.h.
+#
+# To cope with pre-4.15 and 4.15/post-4.15 releases,
+# we run a simple BPF program to detect whether
+# field sched_entity->runnable_weight exists. The existence of
+# this field should infer the existence of cfs_rq->runnable_weight.
+#
+# This will need maintenance as the relationship between these
+# two fields may change in the future.
+#
+def check_runnable_weight_field():
+ # Define the bpf program for checking purpose
+ bpf_check_text = """
+#include <linux/sched.h>
+unsigned long dummy(struct sched_entity *entity)
+{
+ return entity->runnable_weight;
+}
+"""
+
+ # Get a temporary file name
+ tmp_file = NamedTemporaryFile(delete=False)
+ tmp_file.close();
+
+ # Duplicate and close stderr (fd = 2)
+ old_stderr = dup(2)
+ close(2)
+
+ # Open a new file, should get fd number 2
+ # This will avoid printing llvm errors on the screen
+ fd = open(tmp_file.name, O_WRONLY)
+ try:
+ t = BPF(text=bpf_check_text)
+ success_compile = True
+ except:
+ success_compile = False
+
+ # Release the fd 2, and next dup should restore old stderr
+ close(fd)
+ dup(old_stderr)
+ close(old_stderr)
+
+ # remove the temporary file and return
+ unlink(tmp_file.name)
+ return success_compile
+
+
# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
@@ -60,6 +122,7 @@
// header. This will need maintenance. It is from kernel/sched/sched.h:
struct cfs_rq_partial {
struct load_weight load;
+ RUNNABLE_WEIGHT_FIELD
unsigned int nr_running, h_nr_running;
};
@@ -106,6 +169,11 @@
'BPF_HISTOGRAM(dist, unsigned int);')
bpf_text = bpf_text.replace('STORE', 'dist.increment(len);')
+if check_runnable_weight_field():
+ bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', 'unsigned long runnable_weight;')
+else:
+ bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', '')
+
# code substitutions
if debug:
print(bpf_text)