tools/offcputime Warn user when an event skipped by negative duration
diff --git a/tools/offcputime.py b/tools/offcputime.py
index c1bf79c..7ba5dc5 100755
--- a/tools/offcputime.py
+++ b/tools/offcputime.py
@@ -13,7 +13,7 @@
from __future__ import print_function
from bcc import BPF
from sys import stderr
-from time import sleep, strftime
+from time import strftime
import argparse
import errno
import signal
@@ -126,6 +126,14 @@
BPF_HASH(start, u32);
BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
+struct warn_event_t {
+ u32 pid;
+ u32 tgid;
+ u32 t_start;
+ u32 t_end;
+};
+BPF_PERF_OUTPUT(warn_events);
+
int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
u32 pid = prev->pid;
u32 tgid = prev->tgid;
@@ -150,6 +158,13 @@
u64 t_end = bpf_ktime_get_ns();
start.delete(&pid);
if (t_start > t_end) {
+ struct warn_event_t event = {
+ .pid = pid,
+ .tgid = tgid,
+ .t_start = t_start,
+ .t_end = t_end,
+ };
+ warn_events.perf_submit(ctx, &event, sizeof(event));
return 0;
}
u64 delta = t_end - t_start;
@@ -251,8 +266,23 @@
else:
print("... Hit Ctrl-C to end.")
+
+def print_warn_event(cpu, data, size):
+ event = b["warn_events"].event(data)
+ # See https://github.com/iovisor/bcc/pull/3227 for those wondering how can this happen.
+ print("WARN: Skipped an event with negative duration: pid:%d, tgid:%d, off-cpu:%d, on-cpu:%d"
+ % (event.pid, event.tgid, event.t_start, event.t_end),
+ file=stderr)
+
+b["warn_events"].open_perf_buffer(print_warn_event)
try:
- sleep(duration)
+ duration_ms = duration * 1000
+ start_time_ms = int(BPF.monotonic_time() / 1000000)
+ while True:
+ elapsed_ms = int(BPF.monotonic_time() / 1000000) - start_time_ms
+ if elapsed_ms >= duration_ms:
+ break
+ b.perf_buffer_poll(timeout=duration_ms - elapsed_ms)
except KeyboardInterrupt:
# as cleanup can take many seconds, trap Ctrl-C:
signal.signal(signal.SIGINT, signal_ignore)