blob: e4ea92224e769279021338bf78bbe92d190bef7f [file] [log] [blame]
Zwb6d147e62019-03-10 01:59:53 +08001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# drsnoop Trace direct reclaim and print details including issuing PID.
5# For Linux, uses BCC, eBPF.
6#
7# This uses in-kernel eBPF maps to cache process details (PID and comm) by
8# direct reclaim begin, as well as a starting timestamp for calculating
9# latency.
10#
Wenbo Zhangc4219702020-03-04 04:31:27 +000011# Copyright (c) 2019 Wenbo Zhang
Zwb6d147e62019-03-10 01:59:53 +080012# Licensed under the Apache License, Version 2.0 (the "License")
13#
Wenbo Zhangc4219702020-03-04 04:31:27 +000014# 20-Feb-2019 Wenbo Zhang Created this.
15# 09-Mar-2019 Wenbo Zhang Updated for show sys mem info.
Zwb6d147e62019-03-10 01:59:53 +080016
17from __future__ import print_function
18from bcc import ArgString, BPF
19import argparse
20from datetime import datetime, timedelta
21import os
22import math
23
24# symbols
25kallsyms = "/proc/kallsyms"
26
27# arguments
28examples = """examples:
29 ./drsnoop # trace all direct reclaim
30 ./drsnoop -T # include timestamps
31 ./drsnoop -U # include UID
32 ./drsnoop -P 181 # only trace PID 181
33 ./drsnoop -t 123 # only trace TID 123
34 ./drsnoop -u 1000 # only trace UID 1000
35 ./drsnoop -d 10 # trace for 10 seconds only
36 ./drsnoop -n main # only print process names containing "main"
37"""
38parser = argparse.ArgumentParser(
39 description="Trace direct reclaim",
40 formatter_class=argparse.RawDescriptionHelpFormatter,
41 epilog=examples)
42parser.add_argument("-T", "--timestamp", action="store_true",
43 help="include timestamp on output")
44parser.add_argument("-U", "--print-uid", action="store_true",
45 help="print UID column")
46parser.add_argument("-p", "--pid",
47 help="trace this PID only")
48parser.add_argument("-t", "--tid",
49 help="trace this TID only")
50parser.add_argument("-u", "--uid",
51 help="trace this UID only")
52parser.add_argument("-d", "--duration",
53 help="total duration of trace in seconds")
54parser.add_argument("-n", "--name",
55 type=ArgString,
56 help="only print process names containing this name")
57parser.add_argument("-v", "--verbose", action="store_true",
58 help="show system memory state")
59parser.add_argument("--ebpf", action="store_true",
60 help=argparse.SUPPRESS)
61args = parser.parse_args()
62debug = 0
63if args.duration:
64 args.duration = timedelta(seconds=int(args.duration))
65
66
67# vm_stat
68vm_stat_addr = ''
69with open(kallsyms) as syms:
70 for line in syms:
71 (addr, size, name) = line.rstrip().split(" ", 2)
72 name = name.split("\t")[0]
73 if name == "vm_stat":
74 vm_stat_addr = "0x" + addr
75 break
76 if name == "vm_zone_stat":
77 vm_stat_addr = "0x" + addr
78 break
79 if vm_stat_addr == '':
80 print("ERROR: no vm_stat or vm_zone_stat in /proc/kallsyms. Exiting.")
81 print("HINT: the kernel should be built with CONFIG_KALLSYMS_ALL.")
82 exit()
83
84NR_FREE_PAGES = 0
85
86PAGE_SIZE = os.sysconf("SC_PAGE_SIZE")
87PAGE_SHIFT = int(math.log(PAGE_SIZE) / math.log(2))
88
89def K(x):
90 return x << (PAGE_SHIFT - 10)
91
92# load BPF program
93bpf_text = """
94#include <uapi/linux/ptrace.h>
95#include <linux/sched.h>
96#include <linux/mmzone.h>
97
98struct val_t {
99 u64 id;
100 u64 ts; // start time
101 char name[TASK_COMM_LEN];
102 u64 vm_stat[NR_VM_ZONE_STAT_ITEMS];
103};
104
105struct data_t {
106 u64 id;
107 u32 uid;
108 u64 nr_reclaimed;
109 u64 delta;
110 u64 ts; // end time
111 char name[TASK_COMM_LEN];
112 u64 vm_stat[NR_VM_ZONE_STAT_ITEMS];
113};
114
115BPF_HASH(start, u64, struct val_t);
116BPF_PERF_OUTPUT(events);
117
118TRACEPOINT_PROBE(vmscan, mm_vmscan_direct_reclaim_begin) {
119 struct val_t val = {};
120 u64 id = bpf_get_current_pid_tgid();
121 u32 pid = id >> 32; // PID is higher part
122 u32 tid = id; // Cast and get the lower part
123 u32 uid = bpf_get_current_uid_gid();
124 u64 ts;
125
126 PID_TID_FILTER
127 UID_FILTER
128 if (bpf_get_current_comm(&val.name, sizeof(val.name)) == 0) {
129 val.id = id;
130 val.ts = bpf_ktime_get_ns();
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500131 bpf_probe_read_kernel(&val.vm_stat, sizeof(val.vm_stat), (const void *)%s);
Zwb6d147e62019-03-10 01:59:53 +0800132 start.update(&id, &val);
133 }
134 return 0;
135}
136
137TRACEPOINT_PROBE(vmscan, mm_vmscan_direct_reclaim_end) {
138 u64 id = bpf_get_current_pid_tgid();
139 struct val_t *valp;
140 struct data_t data = {};
141 u64 ts = bpf_ktime_get_ns();
142
143 valp = start.lookup(&id);
144 if (valp == NULL) {
145 // missed entry
146 return 0;
147 }
148
149 data.delta = ts - valp->ts;
150 data.ts = ts / 1000;
151 data.id = valp->id;
152 data.uid = bpf_get_current_uid_gid();
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500153 bpf_probe_read_kernel(&data.name, sizeof(data.name), valp->name);
154 bpf_probe_read_kernel(&data.vm_stat, sizeof(data.vm_stat), valp->vm_stat);
Zwb6d147e62019-03-10 01:59:53 +0800155 data.nr_reclaimed = args->nr_reclaimed;
156
157 events.perf_submit(args, &data, sizeof(data));
158 start.delete(&id);
159
160 return 0;
161}
162""" % vm_stat_addr
163
164if args.tid: # TID trumps PID
165 bpf_text = bpf_text.replace('PID_TID_FILTER',
166 'if (tid != %s) { return 0; }' % args.tid)
167elif args.pid:
168 bpf_text = bpf_text.replace('PID_TID_FILTER',
169 'if (pid != %s) { return 0; }' % args.pid)
170else:
171 bpf_text = bpf_text.replace('PID_TID_FILTER', '')
172if args.uid:
173 bpf_text = bpf_text.replace('UID_FILTER',
174 'if (uid != %s) { return 0; }' % args.uid)
175else:
176 bpf_text = bpf_text.replace('UID_FILTER', '')
177if debug or args.ebpf:
178 print(bpf_text)
179 if args.ebpf:
180 exit()
181
182# initialize BPF
183b = BPF(text=bpf_text)
184
185initial_ts = 0
186
187# header
188if args.timestamp:
189 print("%-14s" % ("TIME(s)"), end="")
190if args.print_uid:
191 print("%-6s" % ("UID"), end="")
192print("%-14s %-6s %8s %5s" %
193 ("COMM", "TID" if args.tid else "PID", "LAT(ms)", "PAGES"), end="")
194if args.verbose:
195 print("%10s" % ("FREE(KB)"))
196else:
197 print("")
198
199# process event
200def print_event(cpu, data, size):
201 event = b["events"].event(data)
202
203 global initial_ts
204
205 if not initial_ts:
206 initial_ts = event.ts
207
208 if args.name and bytes(args.name) not in event.name:
209 return
210
211 if args.timestamp:
212 delta = event.ts - initial_ts
213 print("%-14.9f" % (float(delta) / 1000000), end="")
214
215 if args.print_uid:
216 print("%-6d" % event.uid, end="")
217
218 print("%-14.14s %-6s %8.2f %5d" %
219 (event.name.decode('utf-8', 'replace'),
220 event.id & 0xffffffff if args.tid else event.id >> 32,
221 float(event.delta) / 1000000, event.nr_reclaimed), end="")
222 if args.verbose:
223 print("%10d" % K(event.vm_stat[NR_FREE_PAGES]))
224 else:
225 print("")
226
227
228# loop with callback to print_event
229b["events"].open_perf_buffer(print_event, page_cnt=64)
230start_time = datetime.now()
231while not args.duration or datetime.now() - start_time < args.duration:
232 try:
233 b.perf_buffer_poll()
234 except KeyboardInterrupt:
235 exit()