blob: 7d2babb89d07d601df2bbf09435a048920cae4d8 [file] [log] [blame]
Brendan Gregg60393ea2016-10-04 15:18:11 -07001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# tcptop Summarize TCP send/recv throughput by host.
5# For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: tcptop [-h] [-C] [-S] [-p PID] [interval [count]]
8#
9# This uses dynamic tracing of kernel functions, and will need to be updated
10# to match kernel changes.
11#
12# WARNING: This traces all send/receives at the TCP level, and while it
13# summarizes data in-kernel to reduce overhead, there may still be some
14# overhead at high TCP send/receive rates (eg, ~13% of one CPU at 100k TCP
15# events/sec. This is not the same as packet rate: funccount can be used to
16# count the kprobes below to find out the TCP rate). Test in a lab environment
17# first. If your send/receive rate is low (eg, <1k/sec) then the overhead is
18# expected to be negligible.
19#
20# ToDo: Fit output to screen size (top X only) in default (not -C) mode.
21#
22# Copyright 2016 Netflix, Inc.
23# Licensed under the Apache License, Version 2.0 (the "License")
24#
25# 02-Sep-2016 Brendan Gregg Created this.
26
27from __future__ import print_function
28from bcc import BPF
29import argparse
30from socket import inet_ntop, AF_INET, AF_INET6
31from struct import pack
32from time import sleep, strftime
33from subprocess import call
34import ctypes as ct
35
36# arguments
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -070037def range_check(string):
38 value = int(string)
39 if value < 1:
40 msg = "value must be stricly positive, got %d" % (value,)
41 raise argparse.ArgumentTypeError(msg)
42 return value
43
Brendan Gregg60393ea2016-10-04 15:18:11 -070044examples = """examples:
45 ./tcptop # trace TCP send/recv by host
46 ./tcptop -C # don't clear the screen
47 ./tcptop -p 181 # only trace PID 181
48"""
49parser = argparse.ArgumentParser(
50 description="Summarize TCP send/recv throughput by host",
51 formatter_class=argparse.RawDescriptionHelpFormatter,
52 epilog=examples)
53parser.add_argument("-C", "--noclear", action="store_true",
54 help="don't clear the screen")
55parser.add_argument("-S", "--nosummary", action="store_true",
56 help="skip system summary line")
57parser.add_argument("-p", "--pid",
58 help="trace this PID only")
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -070059parser.add_argument("interval", nargs="?", default=1, type=range_check,
Brendan Gregg60393ea2016-10-04 15:18:11 -070060 help="output interval, in seconds (default 1)")
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -070061parser.add_argument("count", nargs="?", default=-1, type=range_check,
Brendan Gregg60393ea2016-10-04 15:18:11 -070062 help="number of outputs")
Nathan Scottcf0792f2018-02-02 16:56:50 +110063parser.add_argument("--ebpf", action="store_true",
64 help=argparse.SUPPRESS)
Brendan Gregg60393ea2016-10-04 15:18:11 -070065args = parser.parse_args()
Brendan Gregg60393ea2016-10-04 15:18:11 -070066debug = 0
67
68# linux stats
69loadavg = "/proc/loadavg"
70
71# define BPF program
72bpf_text = """
73#include <uapi/linux/ptrace.h>
74#include <net/sock.h>
75#include <bcc/proto.h>
76
77struct ipv4_key_t {
78 u32 pid;
79 u32 saddr;
80 u32 daddr;
81 u16 lport;
82 u16 dport;
83};
84BPF_HASH(ipv4_send_bytes, struct ipv4_key_t);
85BPF_HASH(ipv4_recv_bytes, struct ipv4_key_t);
86
87struct ipv6_key_t {
88 u32 pid;
89 // workaround until unsigned __int128 support:
90 u64 saddr0;
91 u64 saddr1;
92 u64 daddr0;
93 u64 daddr1;
94 u16 lport;
95 u16 dport;
96};
97BPF_HASH(ipv6_send_bytes, struct ipv6_key_t);
98BPF_HASH(ipv6_recv_bytes, struct ipv6_key_t);
99
100int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk,
101 struct msghdr *msg, size_t size)
102{
103 u32 pid = bpf_get_current_pid_tgid();
104 FILTER
105 u16 dport = 0, family = sk->__sk_common.skc_family;
106 u64 *val, zero = 0;
107
108 if (family == AF_INET) {
109 struct ipv4_key_t ipv4_key = {.pid = pid};
110 ipv4_key.saddr = sk->__sk_common.skc_rcv_saddr;
111 ipv4_key.daddr = sk->__sk_common.skc_daddr;
112 ipv4_key.lport = sk->__sk_common.skc_num;
113 dport = sk->__sk_common.skc_dport;
114 ipv4_key.dport = ntohs(dport);
115 val = ipv4_send_bytes.lookup_or_init(&ipv4_key, &zero);
116 (*val) += size;
117
118 } else if (family == AF_INET6) {
119 struct ipv6_key_t ipv6_key = {.pid = pid};
120
Yonghong Song20fb64c2018-06-02 00:28:38 -0700121 ipv6_key.saddr0 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0];
122 ipv6_key.saddr1 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2];
123 ipv6_key.daddr0 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0];
124 ipv6_key.daddr1 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2];
Brendan Gregg60393ea2016-10-04 15:18:11 -0700125 ipv6_key.lport = sk->__sk_common.skc_num;
126 dport = sk->__sk_common.skc_dport;
127 ipv6_key.dport = ntohs(dport);
128 val = ipv6_send_bytes.lookup_or_init(&ipv6_key, &zero);
129 (*val) += size;
130 }
131 // else drop
132
133 return 0;
134}
135
136/*
137 * tcp_recvmsg() would be obvious to trace, but is less suitable because:
138 * - we'd need to trace both entry and return, to have both sock and size
139 * - misses tcp_read_sock() traffic
140 * we'd much prefer tracepoints once they are available.
141 */
142int kprobe__tcp_cleanup_rbuf(struct pt_regs *ctx, struct sock *sk, int copied)
143{
144 u32 pid = bpf_get_current_pid_tgid();
145 FILTER
146 u16 dport = 0, family = sk->__sk_common.skc_family;
147 u64 *val, zero = 0;
148
Benjamin Poirier81ad0542017-07-28 13:25:14 -0700149 if (copied <= 0)
Paul Chaignon6d9b1b22017-10-07 11:06:41 +0200150 return 0;
Benjamin Poirier81ad0542017-07-28 13:25:14 -0700151
Brendan Gregg60393ea2016-10-04 15:18:11 -0700152 if (family == AF_INET) {
153 struct ipv4_key_t ipv4_key = {.pid = pid};
154 ipv4_key.saddr = sk->__sk_common.skc_rcv_saddr;
155 ipv4_key.daddr = sk->__sk_common.skc_daddr;
156 ipv4_key.lport = sk->__sk_common.skc_num;
157 dport = sk->__sk_common.skc_dport;
158 ipv4_key.dport = ntohs(dport);
159 val = ipv4_recv_bytes.lookup_or_init(&ipv4_key, &zero);
160 (*val) += copied;
161
162 } else if (family == AF_INET6) {
163 struct ipv6_key_t ipv6_key = {.pid = pid};
Yonghong Song20fb64c2018-06-02 00:28:38 -0700164 ipv6_key.saddr0 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0];
165 ipv6_key.saddr1 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2];
166 ipv6_key.daddr0 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0];
167 ipv6_key.daddr1 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2];
Brendan Gregg60393ea2016-10-04 15:18:11 -0700168 ipv6_key.lport = sk->__sk_common.skc_num;
169 dport = sk->__sk_common.skc_dport;
170 ipv6_key.dport = ntohs(dport);
171 val = ipv6_recv_bytes.lookup_or_init(&ipv6_key, &zero);
172 (*val) += copied;
173 }
174 // else drop
175
176 return 0;
177}
178"""
179
180# code substitutions
181if args.pid:
182 bpf_text = bpf_text.replace('FILTER',
183 'if (pid != %s) { return 0; }' % args.pid)
184else:
185 bpf_text = bpf_text.replace('FILTER', '')
Nathan Scottcf0792f2018-02-02 16:56:50 +1100186if debug or args.ebpf:
Brendan Gregg60393ea2016-10-04 15:18:11 -0700187 print(bpf_text)
Nathan Scottcf0792f2018-02-02 16:56:50 +1100188 if args.ebpf:
189 exit()
Brendan Gregg60393ea2016-10-04 15:18:11 -0700190
191def pid_to_comm(pid):
192 try:
193 comm = open("/proc/%d/comm" % pid, "r").read().rstrip()
194 return comm
195 except IOError:
196 return str(pid)
197
198# initialize BPF
199b = BPF(text=bpf_text)
200
201ipv4_send_bytes = b["ipv4_send_bytes"]
202ipv4_recv_bytes = b["ipv4_recv_bytes"]
203ipv6_send_bytes = b["ipv6_send_bytes"]
204ipv6_recv_bytes = b["ipv6_recv_bytes"]
205
206print('Tracing... Output every %s secs. Hit Ctrl-C to end' % args.interval)
207
208# output
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -0700209i = 0
210exiting = False
211while i != args.count and not exiting:
Brendan Gregg60393ea2016-10-04 15:18:11 -0700212 try:
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -0700213 sleep(args.interval)
Brendan Gregg60393ea2016-10-04 15:18:11 -0700214 except KeyboardInterrupt:
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -0700215 exiting = True
Brendan Gregg60393ea2016-10-04 15:18:11 -0700216
217 # header
218 if args.noclear:
219 print()
220 else:
221 call("clear")
222 if not args.nosummary:
223 with open(loadavg) as stats:
224 print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read()))
225
226 # IPv4: build dict of all seen keys
227 keys = ipv4_recv_bytes
228 for k, v in ipv4_send_bytes.items():
229 if k not in keys:
230 keys[k] = v
231
232 if keys:
233 print("%-6s %-12s %-21s %-21s %6s %6s" % ("PID", "COMM",
234 "LADDR", "RADDR", "RX_KB", "TX_KB"))
235
236 # output
237 for k, v in reversed(sorted(keys.items(), key=lambda keys: keys[1].value)):
238 send_kbytes = 0
239 if k in ipv4_send_bytes:
240 send_kbytes = int(ipv4_send_bytes[k].value / 1024)
241 recv_kbytes = 0
242 if k in ipv4_recv_bytes:
243 recv_kbytes = int(ipv4_recv_bytes[k].value / 1024)
244
245 print("%-6d %-12.12s %-21s %-21s %6d %6d" % (k.pid,
246 pid_to_comm(k.pid),
247 inet_ntop(AF_INET, pack("I", k.saddr)) + ":" + str(k.lport),
248 inet_ntop(AF_INET, pack("I", k.daddr)) + ":" + str(k.dport),
249 recv_kbytes, send_kbytes))
250
251 ipv4_send_bytes.clear()
252 ipv4_recv_bytes.clear()
253
254 # IPv6: build dict of all seen keys
255 keys = ipv6_recv_bytes
256 for k, v in ipv6_send_bytes.items():
257 if k not in keys:
258 keys[k] = v
259
260 if keys:
261 # more than 80 chars, sadly.
262 print("\n%-6s %-12s %-32s %-32s %6s %6s" % ("PID", "COMM",
263 "LADDR6", "RADDR6", "RX_KB", "TX_KB"))
264
265 # output
266 for k, v in reversed(sorted(keys.items(), key=lambda keys: keys[1].value)):
267 send_kbytes = 0
268 if k in ipv6_send_bytes:
269 send_kbytes = int(ipv6_send_bytes[k].value / 1024)
270 recv_kbytes = 0
271 if k in ipv6_recv_bytes:
272 recv_kbytes = int(ipv6_recv_bytes[k].value / 1024)
273
274 print("%-6d %-12.12s %-32s %-32s %6d %6d" % (k.pid,
275 pid_to_comm(k.pid),
276 inet_ntop(AF_INET6, pack("QQ", k.saddr0, k.saddr1)) + ":" +
277 str(k.lport),
278 inet_ntop(AF_INET6, pack("QQ", k.daddr0, k.daddr1)) + ":" +
279 str(k.dport),
280 recv_kbytes, send_kbytes))
281
282 ipv6_send_bytes.clear()
283 ipv6_recv_bytes.clear()
284
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -0700285 i += 1