blob: 51ddace54965da8960ce4a60302fcf84d105a06f [file] [log] [blame]
Emmanuel Bretellea021fd82016-07-14 13:04:57 -07001#!/usr/bin/env python
2# @lint-avoid-python-3-compatibility-imports
3#
4# cachetop Count cache kernel function calls per processes
5# For Linux, uses BCC, eBPF.
6#
7# USAGE: cachetop
8# Taken from cachestat by Brendan Gregg
9#
10# Copyright (c) 2016-present, Facebook, Inc.
11# Licensed under the Apache License, Version 2.0 (the "License")
12#
13# 13-Jul-2016 Emmanuel Bretelle first version
14
15from __future__ import absolute_import
16from __future__ import division
chantrae159f7e2016-07-23 15:33:11 +020017# Do not import unicode_literals until #623 is fixed
18# from __future__ import unicode_literals
Emmanuel Bretellea021fd82016-07-14 13:04:57 -070019from __future__ import print_function
chantrae159f7e2016-07-23 15:33:11 +020020
Emmanuel Bretellea021fd82016-07-14 13:04:57 -070021from bcc import BPF
chantrae159f7e2016-07-23 15:33:11 +020022from collections import defaultdict
23from time import strftime
Emmanuel Bretellea021fd82016-07-14 13:04:57 -070024
25import argparse
26import curses
27import pwd
28import re
29import signal
30from time import sleep
31
32FIELDS = (
33 "PID",
34 "UID",
35 "CMD",
36 "HITS",
37 "MISSES",
38 "DIRTIES",
39 "READ_HIT%",
40 "WRITE_HIT%"
41)
42DEFAULT_FIELD = "HITS"
Teng Qinaaca9762019-01-11 11:18:45 -080043DEFAULT_SORT_FIELD = FIELDS.index(DEFAULT_FIELD)
Emmanuel Bretellea021fd82016-07-14 13:04:57 -070044
45# signal handler
46def signal_ignore(signal, frame):
47 print()
48
49
50# Function to gather data from /proc/meminfo
51# return dictionary for quicker lookup of both values
52def get_meminfo():
53 result = {}
54
55 for line in open('/proc/meminfo'):
56 k = line.split(':', 3)
57 v = k[1].split()
58 result[k[0]] = int(v[0])
59 return result
60
61
62def get_processes_stats(
63 bpf,
Teng Qinaaca9762019-01-11 11:18:45 -080064 sort_field=DEFAULT_SORT_FIELD,
Emmanuel Bretellea021fd82016-07-14 13:04:57 -070065 sort_reverse=False):
66 '''
67 Return a tuple containing:
68 buffer
69 cached
70 list of tuple with per process cache stats
71 '''
Emmanuel Bretellea021fd82016-07-14 13:04:57 -070072 counts = bpf.get_table("counts")
73 stats = defaultdict(lambda: defaultdict(int))
74 for k, v in counts.items():
jeromemarchandb96ebcd2018-10-10 01:58:15 +020075 stats["%d-%d-%s" % (k.pid, k.uid, k.comm.decode('utf-8', 'replace'))][k.ip] = v.value
Emmanuel Bretellea021fd82016-07-14 13:04:57 -070076 stats_list = []
77
78 for pid, count in sorted(stats.items(), key=lambda stat: stat[0]):
chantraa2d669c2016-07-29 14:10:15 -070079 rtaccess = 0
80 wtaccess = 0
81 mpa = 0
82 mbd = 0
83 apcl = 0
84 apd = 0
85 access = 0
86 misses = 0
87 rhits = 0
88 whits = 0
89
Emmanuel Bretellea021fd82016-07-14 13:04:57 -070090 for k, v in count.items():
Gary Linc5b5b302018-04-02 16:29:11 +080091 if re.match(b'mark_page_accessed', bpf.ksym(k)) is not None:
chantraa2d669c2016-07-29 14:10:15 -070092 mpa = max(0, v)
Emmanuel Bretellea021fd82016-07-14 13:04:57 -070093
Gary Linc5b5b302018-04-02 16:29:11 +080094 if re.match(b'mark_buffer_dirty', bpf.ksym(k)) is not None:
chantraa2d669c2016-07-29 14:10:15 -070095 mbd = max(0, v)
Emmanuel Bretellea021fd82016-07-14 13:04:57 -070096
Gary Linc5b5b302018-04-02 16:29:11 +080097 if re.match(b'add_to_page_cache_lru', bpf.ksym(k)) is not None:
chantraa2d669c2016-07-29 14:10:15 -070098 apcl = max(0, v)
Emmanuel Bretellea021fd82016-07-14 13:04:57 -070099
Gary Linc5b5b302018-04-02 16:29:11 +0800100 if re.match(b'account_page_dirtied', bpf.ksym(k)) is not None:
chantraa2d669c2016-07-29 14:10:15 -0700101 apd = max(0, v)
Emmanuel Bretellea021fd82016-07-14 13:04:57 -0700102
103 # access = total cache access incl. reads(mpa) and writes(mbd)
104 # misses = total of add to lru which we do when we write(mbd)
105 # and also the mark the page dirty(same as mbd)
106 access = (mpa + mbd)
107 misses = (apcl + apd)
108
109 # rtaccess is the read hit % during the sample period.
110 # wtaccess is the write hit % during the smaple period.
111 if mpa > 0:
112 rtaccess = float(mpa) / (access + misses)
113 if apcl > 0:
114 wtaccess = float(apcl) / (access + misses)
115
116 if wtaccess != 0:
117 whits = 100 * wtaccess
118 if rtaccess != 0:
119 rhits = 100 * rtaccess
120
121 _pid, uid, comm = pid.split('-', 2)
122 stats_list.append(
123 (int(_pid), uid, comm,
124 access, misses, mbd,
125 rhits, whits))
126
127 stats_list = sorted(
128 stats_list, key=lambda stat: stat[sort_field], reverse=sort_reverse
129 )
130 counts.clear()
131 return stats_list
132
133
134def handle_loop(stdscr, args):
135 # don't wait on key press
136 stdscr.nodelay(1)
137 # set default sorting field
138 sort_field = FIELDS.index(DEFAULT_FIELD)
139 sort_reverse = False
140
141 # load BPF program
142 bpf_text = """
143
144 #include <uapi/linux/ptrace.h>
145 struct key_t {
146 u64 ip;
147 u32 pid;
148 u32 uid;
149 char comm[16];
150 };
151
152 BPF_HASH(counts, struct key_t);
153
154 int do_count(struct pt_regs *ctx) {
155 struct key_t key = {};
Emmanuel Bretellea021fd82016-07-14 13:04:57 -0700156 u64 pid = bpf_get_current_pid_tgid();
157 u32 uid = bpf_get_current_uid_gid();
158
159 key.ip = PT_REGS_IP(ctx);
160 key.pid = pid & 0xFFFFFFFF;
161 key.uid = uid & 0xFFFFFFFF;
162 bpf_get_current_comm(&(key.comm), 16);
163
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200164 counts.increment(key);
Emmanuel Bretellea021fd82016-07-14 13:04:57 -0700165 return 0;
166 }
167
168 """
169 b = BPF(text=bpf_text)
170 b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count")
171 b.attach_kprobe(event="mark_page_accessed", fn_name="do_count")
172 b.attach_kprobe(event="account_page_dirtied", fn_name="do_count")
173 b.attach_kprobe(event="mark_buffer_dirty", fn_name="do_count")
174
175 exiting = 0
176
177 while 1:
178 s = stdscr.getch()
179 if s == ord('q'):
180 exiting = 1
181 elif s == ord('r'):
182 sort_reverse = not sort_reverse
183 elif s == ord('<'):
184 sort_field = max(0, sort_field - 1)
185 elif s == ord('>'):
186 sort_field = min(len(FIELDS) - 1, sort_field + 1)
187 try:
188 sleep(args.interval)
189 except KeyboardInterrupt:
190 exiting = 1
191 # as cleanup can take many seconds, trap Ctrl-C:
192 signal.signal(signal.SIGINT, signal_ignore)
193
194 # Get memory info
195 mem = get_meminfo()
196 cached = int(mem["Cached"]) / 1024
197 buff = int(mem["Buffers"]) / 1024
198
199 process_stats = get_processes_stats(
200 b,
201 sort_field=sort_field,
202 sort_reverse=sort_reverse)
203 stdscr.clear()
204 stdscr.addstr(
205 0, 0,
chantrabeefca92016-07-25 18:32:46 -0700206 "%-8s Buffers MB: %.0f / Cached MB: %.0f "
207 "/ Sort: %s / Order: %s" % (
208 strftime("%H:%M:%S"), buff, cached, FIELDS[sort_field],
209 sort_reverse and "descending" or "ascending"
chantrae159f7e2016-07-23 15:33:11 +0200210 )
Emmanuel Bretellea021fd82016-07-14 13:04:57 -0700211 )
212
213 # header
214 stdscr.addstr(
215 1, 0,
216 "{0:8} {1:8} {2:16} {3:8} {4:8} {5:8} {6:10} {7:10}".format(
217 *FIELDS
218 ),
219 curses.A_REVERSE
220 )
221 (height, width) = stdscr.getmaxyx()
222 for i, stat in enumerate(process_stats):
Rune Juhl Jacobsen2933df52017-10-29 22:19:14 +0100223 uid = int(stat[1])
224 try:
225 username = pwd.getpwuid(uid)[0]
Teng Qinaaca9762019-01-11 11:18:45 -0800226 except KeyError:
Rune Juhl Jacobsen2933df52017-10-29 22:19:14 +0100227 # `pwd` throws a KeyError if the user cannot be found. This can
228 # happen e.g. when the process is running in a cgroup that has
229 # different users from the host.
230 username = 'UNKNOWN({})'.format(uid)
231
Emmanuel Bretellea021fd82016-07-14 13:04:57 -0700232 stdscr.addstr(
233 i + 2, 0,
chantra75dfd5a2016-07-19 00:17:45 +0200234 "{0:8} {username:8.8} {2:16} {3:8} {4:8} "
Emmanuel Bretellea021fd82016-07-14 13:04:57 -0700235 "{5:8} {6:9.1f}% {7:9.1f}%".format(
Rune Juhl Jacobsen2933df52017-10-29 22:19:14 +0100236 *stat, username=username
Emmanuel Bretellea021fd82016-07-14 13:04:57 -0700237 )
238 )
239 if i > height - 4:
240 break
241 stdscr.refresh()
242 if exiting:
243 print("Detaching...")
244 return
245
246
247def parse_arguments():
chantra75dfd5a2016-07-19 00:17:45 +0200248 parser = argparse.ArgumentParser(
249 description='show Linux page cache hit/miss statistics including read '
250 'and write hit % per processes in a UI like top.'
251 )
Emmanuel Bretellea021fd82016-07-14 13:04:57 -0700252 parser.add_argument(
chantra75dfd5a2016-07-19 00:17:45 +0200253 'interval', type=int, default=5, nargs='?',
Emmanuel Bretellea021fd82016-07-14 13:04:57 -0700254 help='Interval between probes.'
255 )
256
257 args = parser.parse_args()
258 return args
259
260args = parse_arguments()
261curses.wrapper(handle_loop, args)