Brendan Gregg | 5bfadab | 2016-02-10 01:36:51 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # @lint-avoid-python-3-compatibility-imports |
| 3 | # |
| 4 | # dcstat Directory entry cache (dcache) stats. |
| 5 | # For Linux, uses BCC, eBPF. |
| 6 | # |
| 7 | # USAGE: dcstat [interval [count]] |
| 8 | # |
| 9 | # This uses kernel dynamic tracing of kernel functions, lookup_fast() and |
| 10 | # d_lookup(), which will need to be modified to match kernel changes. See |
| 11 | # code comments. |
| 12 | # |
| 13 | # Copyright 2016 Netflix, Inc. |
| 14 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 15 | # |
| 16 | # 09-Feb-2016 Brendan Gregg Created this. |
| 17 | |
| 18 | from __future__ import print_function |
| 19 | from bcc import BPF |
| 20 | from ctypes import c_int |
| 21 | from time import sleep, strftime |
| 22 | from sys import argv |
| 23 | |
| 24 | def usage(): |
| 25 | print("USAGE: %s [interval [count]]" % argv[0]) |
| 26 | exit() |
| 27 | |
| 28 | # arguments |
| 29 | interval = 1 |
| 30 | count = -1 |
| 31 | if len(argv) > 1: |
| 32 | try: |
| 33 | interval = int(argv[1]) |
| 34 | if interval == 0: |
| 35 | raise |
| 36 | if len(argv) > 2: |
| 37 | count = int(argv[2]) |
| 38 | except: # also catches -h, --help |
| 39 | usage() |
| 40 | |
| 41 | # define BPF program |
| 42 | bpf_text = """ |
| 43 | #include <uapi/linux/ptrace.h> |
| 44 | |
| 45 | enum stats { |
| 46 | S_REFS = 1, |
| 47 | S_SLOW, |
| 48 | S_MISS, |
| 49 | S_MAXSTAT |
| 50 | }; |
| 51 | |
Brenden Blanco | d51870b | 2017-08-16 11:29:23 -0700 | [diff] [blame] | 52 | BPF_ARRAY(stats, u64, S_MAXSTAT); |
Brendan Gregg | 5bfadab | 2016-02-10 01:36:51 -0800 | [diff] [blame] | 53 | |
| 54 | /* |
| 55 | * How this is instrumented, and how to interpret the statistics, is very much |
| 56 | * tied to the current kernel implementation (this was written on Linux 4.4). |
| 57 | * This will need maintenance to keep working as the implementation changes. To |
| 58 | * aid future adventurers, this is is what the current code does, and why. |
| 59 | * |
| 60 | * First problem: the current implementation takes a path and then does a |
| 61 | * lookup of each component. So how do we count a reference? Once for the path |
| 62 | * lookup, or once for every component lookup? I've chosen the latter |
| 63 | * since it seems to map more closely to actual dcache lookups (via |
| 64 | * __d_lookup_rcu()). It's counted via calls to lookup_fast(). |
| 65 | * |
| 66 | * The implementation tries different, progressively slower, approaches to |
Edward Betts | fdf9b08 | 2017-10-10 21:13:28 +0100 | [diff] [blame] | 67 | * lookup a file. At what point do we call it a dcache miss? I've chosen when |
Brendan Gregg | 5bfadab | 2016-02-10 01:36:51 -0800 | [diff] [blame] | 68 | * a d_lookup() (which is called during lookup_slow()) returns zero. |
| 69 | * |
| 70 | * I've also included a "SLOW" statistic to show how often the fast lookup |
| 71 | * failed. Whether this exists or is interesting is an implementation detail, |
| 72 | * and the "SLOW" statistic may be removed in future versions. |
| 73 | */ |
| 74 | void count_fast(struct pt_regs *ctx) { |
| 75 | int key = S_REFS; |
| 76 | u64 *leaf = stats.lookup(&key); |
| 77 | if (leaf) (*leaf)++; |
| 78 | } |
| 79 | |
| 80 | void count_lookup(struct pt_regs *ctx) { |
| 81 | int key = S_SLOW; |
| 82 | u64 *leaf = stats.lookup(&key); |
| 83 | if (leaf) (*leaf)++; |
Naveen N. Rao | 4afa96a | 2016-05-03 14:54:21 +0530 | [diff] [blame] | 84 | if (PT_REGS_RC(ctx) == 0) { |
Brendan Gregg | 5bfadab | 2016-02-10 01:36:51 -0800 | [diff] [blame] | 85 | key = S_MISS; |
| 86 | leaf = stats.lookup(&key); |
| 87 | if (leaf) (*leaf)++; |
| 88 | } |
| 89 | } |
| 90 | """ |
| 91 | |
| 92 | # load BPF program |
| 93 | b = BPF(text=bpf_text) |
| 94 | b.attach_kprobe(event="lookup_fast", fn_name="count_fast") |
| 95 | b.attach_kretprobe(event="d_lookup", fn_name="count_lookup") |
| 96 | |
| 97 | # stat column labels and indexes |
| 98 | stats = { |
| 99 | "REFS": 1, |
| 100 | "SLOW": 2, |
| 101 | "MISS": 3 |
| 102 | } |
| 103 | |
| 104 | # header |
| 105 | print("%-8s " % "TIME", end="") |
Brenden Blanco | c94ab7a | 2016-03-11 15:34:29 -0800 | [diff] [blame] | 106 | for stype, idx in sorted(stats.items(), key=lambda k_v: (k_v[1], k_v[0])): |
Brendan Gregg | 5bfadab | 2016-02-10 01:36:51 -0800 | [diff] [blame] | 107 | print(" %8s" % (stype + "/s"), end="") |
| 108 | print(" %8s" % "HIT%") |
| 109 | |
| 110 | # output |
| 111 | i = 0 |
| 112 | while (1): |
| 113 | if count > 0: |
| 114 | i += 1 |
| 115 | if i > count: |
| 116 | exit() |
| 117 | try: |
| 118 | sleep(interval) |
| 119 | except KeyboardInterrupt: |
| 120 | pass |
| 121 | exit() |
| 122 | |
| 123 | print("%-8s: " % strftime("%H:%M:%S"), end="") |
| 124 | |
| 125 | # print each statistic as a column |
Brenden Blanco | c94ab7a | 2016-03-11 15:34:29 -0800 | [diff] [blame] | 126 | for stype, idx in sorted(stats.items(), key=lambda k_v: (k_v[1], k_v[0])): |
Brendan Gregg | 5bfadab | 2016-02-10 01:36:51 -0800 | [diff] [blame] | 127 | try: |
| 128 | val = b["stats"][c_int(idx)].value / interval |
| 129 | print(" %8d" % val, end="") |
| 130 | except: |
| 131 | print(" %8d" % 0, end="") |
| 132 | |
| 133 | # print hit ratio percentage |
| 134 | try: |
| 135 | ref = b["stats"][c_int(stats["REFS"])].value |
| 136 | miss = b["stats"][c_int(stats["MISS"])].value |
| 137 | hit = ref - miss |
| 138 | pct = float(100) * hit / ref |
| 139 | print(" %8.2f" % pct) |
| 140 | except: |
| 141 | print(" %7s%%" % "-") |
| 142 | |
| 143 | b["stats"].clear() |