tools/runqslower.py: Fix raw tracepoint code
Closes: #2588
It is was observed that many unrelated process names
were reported for the same pid.
The cause of the bug was the use of bpf_get_current_comm()
which operates on the current process which might already be
different than the 'next' task we captured.
The fix is to use bpf_probe_read_str() instead.
Also removing dead code related to tgid extraction which is not
used at all.
diff --git a/tools/runqslower.py b/tools/runqslower.py
index 5f5c3b9..b678533 100755
--- a/tools/runqslower.py
+++ b/tools/runqslower.py
@@ -28,6 +28,10 @@
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 02-May-2018 Ivan Babrou Created this.
+# 18-Nov-2019 Gergely Bod BUG fix: Use bpf_probe_read_str() to extract the
+# process name from 'task_struct* next' in raw tp code.
+# bpf_get_current_comm() operates on the current task
+# which might already be different than 'next'.
from __future__ import print_function
from bcc import BPF
@@ -163,13 +167,12 @@
// TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next)
struct task_struct *prev = (struct task_struct *)ctx->args[1];
struct task_struct *next= (struct task_struct *)ctx->args[2];
- u32 pid, tgid;
+ u32 pid;
long state;
// ivcsw: treat like an enqueue event and store timestamp
bpf_probe_read(&state, sizeof(long), (const void *)&prev->state);
if (state == TASK_RUNNING) {
- bpf_probe_read(&tgid, sizeof(prev->tgid), &prev->tgid);
bpf_probe_read(&pid, sizeof(prev->pid), &prev->pid);
if (!(FILTER_PID || pid == 0)) {
u64 ts = bpf_ktime_get_ns();
@@ -177,7 +180,6 @@
}
}
- bpf_probe_read(&tgid, sizeof(next->tgid), &next->tgid);
bpf_probe_read(&pid, sizeof(next->pid), &next->pid);
u64 *tsp, delta_us;
@@ -195,7 +197,7 @@
struct data_t data = {};
data.pid = pid;
data.delta_us = delta_us;
- bpf_get_current_comm(&data.task, sizeof(data.task));
+ bpf_probe_read_str(&data.task, sizeof(data.task), next->comm);
// output
events.perf_submit(ctx, &data, sizeof(data));