blob: e0344d12e6addd95203f008d16a55ac5caa27d61 [file] [log] [blame]
Zwb6d147e62019-03-10 01:59:53 +08001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# drsnoop Trace direct reclaim and print details including issuing PID.
5# For Linux, uses BCC, eBPF.
6#
7# This uses in-kernel eBPF maps to cache process details (PID and comm) by
8# direct reclaim begin, as well as a starting timestamp for calculating
9# latency.
10#
Wenbo Zhangc4219702020-03-04 04:31:27 +000011# Copyright (c) 2019 Wenbo Zhang
Zwb6d147e62019-03-10 01:59:53 +080012# Licensed under the Apache License, Version 2.0 (the "License")
13#
Wenbo Zhangc4219702020-03-04 04:31:27 +000014# 20-Feb-2019 Wenbo Zhang Created this.
15# 09-Mar-2019 Wenbo Zhang Updated for show sys mem info.
Zwb6d147e62019-03-10 01:59:53 +080016
17from __future__ import print_function
18from bcc import ArgString, BPF
19import argparse
20from datetime import datetime, timedelta
21import os
22import math
xingfeng25107b2c1142022-03-24 10:51:26 +080023import sys
Zwb6d147e62019-03-10 01:59:53 +080024
25# symbols
26kallsyms = "/proc/kallsyms"
27
28# arguments
29examples = """examples:
30 ./drsnoop # trace all direct reclaim
31 ./drsnoop -T # include timestamps
32 ./drsnoop -U # include UID
33 ./drsnoop -P 181 # only trace PID 181
34 ./drsnoop -t 123 # only trace TID 123
35 ./drsnoop -u 1000 # only trace UID 1000
36 ./drsnoop -d 10 # trace for 10 seconds only
37 ./drsnoop -n main # only print process names containing "main"
38"""
39parser = argparse.ArgumentParser(
40 description="Trace direct reclaim",
41 formatter_class=argparse.RawDescriptionHelpFormatter,
42 epilog=examples)
43parser.add_argument("-T", "--timestamp", action="store_true",
44 help="include timestamp on output")
45parser.add_argument("-U", "--print-uid", action="store_true",
46 help="print UID column")
47parser.add_argument("-p", "--pid",
48 help="trace this PID only")
49parser.add_argument("-t", "--tid",
50 help="trace this TID only")
51parser.add_argument("-u", "--uid",
52 help="trace this UID only")
53parser.add_argument("-d", "--duration",
54 help="total duration of trace in seconds")
55parser.add_argument("-n", "--name",
56 type=ArgString,
57 help="only print process names containing this name")
58parser.add_argument("-v", "--verbose", action="store_true",
59 help="show system memory state")
60parser.add_argument("--ebpf", action="store_true",
61 help=argparse.SUPPRESS)
62args = parser.parse_args()
63debug = 0
64if args.duration:
65 args.duration = timedelta(seconds=int(args.duration))
66
67
68# vm_stat
69vm_stat_addr = ''
70with open(kallsyms) as syms:
71 for line in syms:
72 (addr, size, name) = line.rstrip().split(" ", 2)
73 name = name.split("\t")[0]
74 if name == "vm_stat":
75 vm_stat_addr = "0x" + addr
76 break
77 if name == "vm_zone_stat":
78 vm_stat_addr = "0x" + addr
79 break
80 if vm_stat_addr == '':
81 print("ERROR: no vm_stat or vm_zone_stat in /proc/kallsyms. Exiting.")
82 print("HINT: the kernel should be built with CONFIG_KALLSYMS_ALL.")
83 exit()
84
85NR_FREE_PAGES = 0
86
87PAGE_SIZE = os.sysconf("SC_PAGE_SIZE")
88PAGE_SHIFT = int(math.log(PAGE_SIZE) / math.log(2))
89
90def K(x):
91 return x << (PAGE_SHIFT - 10)
92
93# load BPF program
94bpf_text = """
95#include <uapi/linux/ptrace.h>
96#include <linux/sched.h>
97#include <linux/mmzone.h>
98
99struct val_t {
100 u64 id;
101 u64 ts; // start time
102 char name[TASK_COMM_LEN];
103 u64 vm_stat[NR_VM_ZONE_STAT_ITEMS];
104};
105
106struct data_t {
107 u64 id;
108 u32 uid;
109 u64 nr_reclaimed;
110 u64 delta;
111 u64 ts; // end time
112 char name[TASK_COMM_LEN];
113 u64 vm_stat[NR_VM_ZONE_STAT_ITEMS];
114};
115
116BPF_HASH(start, u64, struct val_t);
117BPF_PERF_OUTPUT(events);
118
119TRACEPOINT_PROBE(vmscan, mm_vmscan_direct_reclaim_begin) {
120 struct val_t val = {};
121 u64 id = bpf_get_current_pid_tgid();
122 u32 pid = id >> 32; // PID is higher part
123 u32 tid = id; // Cast and get the lower part
124 u32 uid = bpf_get_current_uid_gid();
125 u64 ts;
126
127 PID_TID_FILTER
128 UID_FILTER
129 if (bpf_get_current_comm(&val.name, sizeof(val.name)) == 0) {
130 val.id = id;
131 val.ts = bpf_ktime_get_ns();
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500132 bpf_probe_read_kernel(&val.vm_stat, sizeof(val.vm_stat), (const void *)%s);
Zwb6d147e62019-03-10 01:59:53 +0800133 start.update(&id, &val);
134 }
135 return 0;
136}
137
138TRACEPOINT_PROBE(vmscan, mm_vmscan_direct_reclaim_end) {
139 u64 id = bpf_get_current_pid_tgid();
140 struct val_t *valp;
141 struct data_t data = {};
142 u64 ts = bpf_ktime_get_ns();
143
144 valp = start.lookup(&id);
145 if (valp == NULL) {
146 // missed entry
147 return 0;
148 }
149
150 data.delta = ts - valp->ts;
151 data.ts = ts / 1000;
152 data.id = valp->id;
153 data.uid = bpf_get_current_uid_gid();
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500154 bpf_probe_read_kernel(&data.name, sizeof(data.name), valp->name);
155 bpf_probe_read_kernel(&data.vm_stat, sizeof(data.vm_stat), valp->vm_stat);
Zwb6d147e62019-03-10 01:59:53 +0800156 data.nr_reclaimed = args->nr_reclaimed;
157
158 events.perf_submit(args, &data, sizeof(data));
159 start.delete(&id);
160
161 return 0;
162}
163""" % vm_stat_addr
164
165if args.tid: # TID trumps PID
166 bpf_text = bpf_text.replace('PID_TID_FILTER',
167 'if (tid != %s) { return 0; }' % args.tid)
168elif args.pid:
169 bpf_text = bpf_text.replace('PID_TID_FILTER',
170 'if (pid != %s) { return 0; }' % args.pid)
171else:
172 bpf_text = bpf_text.replace('PID_TID_FILTER', '')
173if args.uid:
174 bpf_text = bpf_text.replace('UID_FILTER',
175 'if (uid != %s) { return 0; }' % args.uid)
176else:
177 bpf_text = bpf_text.replace('UID_FILTER', '')
178if debug or args.ebpf:
179 print(bpf_text)
180 if args.ebpf:
181 exit()
182
183# initialize BPF
184b = BPF(text=bpf_text)
185
186initial_ts = 0
187
188# header
189if args.timestamp:
190 print("%-14s" % ("TIME(s)"), end="")
191if args.print_uid:
192 print("%-6s" % ("UID"), end="")
193print("%-14s %-6s %8s %5s" %
194 ("COMM", "TID" if args.tid else "PID", "LAT(ms)", "PAGES"), end="")
195if args.verbose:
196 print("%10s" % ("FREE(KB)"))
197else:
198 print("")
199
200# process event
201def print_event(cpu, data, size):
202 event = b["events"].event(data)
203
204 global initial_ts
205
206 if not initial_ts:
207 initial_ts = event.ts
208
209 if args.name and bytes(args.name) not in event.name:
210 return
211
212 if args.timestamp:
213 delta = event.ts - initial_ts
214 print("%-14.9f" % (float(delta) / 1000000), end="")
215
216 if args.print_uid:
217 print("%-6d" % event.uid, end="")
218
219 print("%-14.14s %-6s %8.2f %5d" %
220 (event.name.decode('utf-8', 'replace'),
221 event.id & 0xffffffff if args.tid else event.id >> 32,
222 float(event.delta) / 1000000, event.nr_reclaimed), end="")
223 if args.verbose:
224 print("%10d" % K(event.vm_stat[NR_FREE_PAGES]))
225 else:
226 print("")
227
xingfeng25107b2c1142022-03-24 10:51:26 +0800228 sys.stdout.flush()
229
Zwb6d147e62019-03-10 01:59:53 +0800230
231# loop with callback to print_event
232b["events"].open_perf_buffer(print_event, page_cnt=64)
233start_time = datetime.now()
234while not args.duration or datetime.now() - start_time < args.duration:
235 try:
236 b.perf_buffer_poll()
237 except KeyboardInterrupt:
238 exit()