blob: a90b2673b115f8d21fae3f16b326b42d3e6e8295 [file] [log] [blame]
Brendan Gregg60393ea2016-10-04 15:18:11 -07001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# tcptop Summarize TCP send/recv throughput by host.
5# For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: tcptop [-h] [-C] [-S] [-p PID] [interval [count]]
8#
9# This uses dynamic tracing of kernel functions, and will need to be updated
10# to match kernel changes.
11#
12# WARNING: This traces all send/receives at the TCP level, and while it
13# summarizes data in-kernel to reduce overhead, there may still be some
14# overhead at high TCP send/receive rates (eg, ~13% of one CPU at 100k TCP
15# events/sec. This is not the same as packet rate: funccount can be used to
16# count the kprobes below to find out the TCP rate). Test in a lab environment
17# first. If your send/receive rate is low (eg, <1k/sec) then the overhead is
18# expected to be negligible.
19#
20# ToDo: Fit output to screen size (top X only) in default (not -C) mode.
21#
22# Copyright 2016 Netflix, Inc.
23# Licensed under the Apache License, Version 2.0 (the "License")
24#
25# 02-Sep-2016 Brendan Gregg Created this.
26
27from __future__ import print_function
28from bcc import BPF
29import argparse
30from socket import inet_ntop, AF_INET, AF_INET6
31from struct import pack
32from time import sleep, strftime
33from subprocess import call
34import ctypes as ct
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +020035from collections import namedtuple, defaultdict
Brendan Gregg60393ea2016-10-04 15:18:11 -070036
37# arguments
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -070038def range_check(string):
39 value = int(string)
40 if value < 1:
41 msg = "value must be stricly positive, got %d" % (value,)
42 raise argparse.ArgumentTypeError(msg)
43 return value
44
Brendan Gregg60393ea2016-10-04 15:18:11 -070045examples = """examples:
46 ./tcptop # trace TCP send/recv by host
47 ./tcptop -C # don't clear the screen
48 ./tcptop -p 181 # only trace PID 181
49"""
50parser = argparse.ArgumentParser(
51 description="Summarize TCP send/recv throughput by host",
52 formatter_class=argparse.RawDescriptionHelpFormatter,
53 epilog=examples)
54parser.add_argument("-C", "--noclear", action="store_true",
55 help="don't clear the screen")
56parser.add_argument("-S", "--nosummary", action="store_true",
57 help="skip system summary line")
58parser.add_argument("-p", "--pid",
59 help="trace this PID only")
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -070060parser.add_argument("interval", nargs="?", default=1, type=range_check,
Brendan Gregg60393ea2016-10-04 15:18:11 -070061 help="output interval, in seconds (default 1)")
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -070062parser.add_argument("count", nargs="?", default=-1, type=range_check,
Brendan Gregg60393ea2016-10-04 15:18:11 -070063 help="number of outputs")
Nathan Scottcf0792f2018-02-02 16:56:50 +110064parser.add_argument("--ebpf", action="store_true",
65 help=argparse.SUPPRESS)
Brendan Gregg60393ea2016-10-04 15:18:11 -070066args = parser.parse_args()
Brendan Gregg60393ea2016-10-04 15:18:11 -070067debug = 0
68
69# linux stats
70loadavg = "/proc/loadavg"
71
72# define BPF program
73bpf_text = """
74#include <uapi/linux/ptrace.h>
75#include <net/sock.h>
76#include <bcc/proto.h>
77
78struct ipv4_key_t {
79 u32 pid;
80 u32 saddr;
81 u32 daddr;
82 u16 lport;
83 u16 dport;
84};
85BPF_HASH(ipv4_send_bytes, struct ipv4_key_t);
86BPF_HASH(ipv4_recv_bytes, struct ipv4_key_t);
87
88struct ipv6_key_t {
89 u32 pid;
90 // workaround until unsigned __int128 support:
91 u64 saddr0;
92 u64 saddr1;
93 u64 daddr0;
94 u64 daddr1;
95 u16 lport;
96 u16 dport;
97};
98BPF_HASH(ipv6_send_bytes, struct ipv6_key_t);
99BPF_HASH(ipv6_recv_bytes, struct ipv6_key_t);
100
101int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk,
102 struct msghdr *msg, size_t size)
103{
104 u32 pid = bpf_get_current_pid_tgid();
105 FILTER
106 u16 dport = 0, family = sk->__sk_common.skc_family;
Brendan Gregg60393ea2016-10-04 15:18:11 -0700107
108 if (family == AF_INET) {
109 struct ipv4_key_t ipv4_key = {.pid = pid};
110 ipv4_key.saddr = sk->__sk_common.skc_rcv_saddr;
111 ipv4_key.daddr = sk->__sk_common.skc_daddr;
112 ipv4_key.lport = sk->__sk_common.skc_num;
113 dport = sk->__sk_common.skc_dport;
114 ipv4_key.dport = ntohs(dport);
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200115 ipv4_send_bytes.increment(ipv4_key, size);
Brendan Gregg60393ea2016-10-04 15:18:11 -0700116
117 } else if (family == AF_INET6) {
118 struct ipv6_key_t ipv6_key = {.pid = pid};
119
Yonghong Song20fb64c2018-06-02 00:28:38 -0700120 ipv6_key.saddr0 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0];
121 ipv6_key.saddr1 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2];
122 ipv6_key.daddr0 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0];
123 ipv6_key.daddr1 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2];
Brendan Gregg60393ea2016-10-04 15:18:11 -0700124 ipv6_key.lport = sk->__sk_common.skc_num;
125 dport = sk->__sk_common.skc_dport;
126 ipv6_key.dport = ntohs(dport);
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200127 ipv6_send_bytes.increment(ipv6_key, size);
128
Brendan Gregg60393ea2016-10-04 15:18:11 -0700129 }
130 // else drop
131
132 return 0;
133}
134
135/*
136 * tcp_recvmsg() would be obvious to trace, but is less suitable because:
137 * - we'd need to trace both entry and return, to have both sock and size
138 * - misses tcp_read_sock() traffic
139 * we'd much prefer tracepoints once they are available.
140 */
141int kprobe__tcp_cleanup_rbuf(struct pt_regs *ctx, struct sock *sk, int copied)
142{
143 u32 pid = bpf_get_current_pid_tgid();
144 FILTER
145 u16 dport = 0, family = sk->__sk_common.skc_family;
146 u64 *val, zero = 0;
147
Benjamin Poirier81ad0542017-07-28 13:25:14 -0700148 if (copied <= 0)
Paul Chaignon6d9b1b22017-10-07 11:06:41 +0200149 return 0;
Benjamin Poirier81ad0542017-07-28 13:25:14 -0700150
Brendan Gregg60393ea2016-10-04 15:18:11 -0700151 if (family == AF_INET) {
152 struct ipv4_key_t ipv4_key = {.pid = pid};
153 ipv4_key.saddr = sk->__sk_common.skc_rcv_saddr;
154 ipv4_key.daddr = sk->__sk_common.skc_daddr;
155 ipv4_key.lport = sk->__sk_common.skc_num;
156 dport = sk->__sk_common.skc_dport;
157 ipv4_key.dport = ntohs(dport);
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200158 ipv4_recv_bytes.increment(ipv4_key, copied);
159
Brendan Gregg60393ea2016-10-04 15:18:11 -0700160
161 } else if (family == AF_INET6) {
162 struct ipv6_key_t ipv6_key = {.pid = pid};
Yonghong Song20fb64c2018-06-02 00:28:38 -0700163 ipv6_key.saddr0 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0];
164 ipv6_key.saddr1 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2];
165 ipv6_key.daddr0 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0];
166 ipv6_key.daddr1 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2];
Brendan Gregg60393ea2016-10-04 15:18:11 -0700167 ipv6_key.lport = sk->__sk_common.skc_num;
168 dport = sk->__sk_common.skc_dport;
169 ipv6_key.dport = ntohs(dport);
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200170 ipv6_recv_bytes.increment(ipv6_key, copied);
Brendan Gregg60393ea2016-10-04 15:18:11 -0700171 }
172 // else drop
173
174 return 0;
175}
176"""
177
178# code substitutions
179if args.pid:
180 bpf_text = bpf_text.replace('FILTER',
181 'if (pid != %s) { return 0; }' % args.pid)
182else:
183 bpf_text = bpf_text.replace('FILTER', '')
Nathan Scottcf0792f2018-02-02 16:56:50 +1100184if debug or args.ebpf:
Brendan Gregg60393ea2016-10-04 15:18:11 -0700185 print(bpf_text)
Nathan Scottcf0792f2018-02-02 16:56:50 +1100186 if args.ebpf:
187 exit()
Brendan Gregg60393ea2016-10-04 15:18:11 -0700188
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200189TCPSessionKey = namedtuple('TCPSession', ['pid', 'laddr', 'lport', 'daddr', 'dport'])
190
Brendan Gregg60393ea2016-10-04 15:18:11 -0700191def pid_to_comm(pid):
192 try:
193 comm = open("/proc/%d/comm" % pid, "r").read().rstrip()
194 return comm
195 except IOError:
196 return str(pid)
197
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200198def get_ipv4_session_key(k):
199 return TCPSessionKey(pid=k.pid,
200 laddr=inet_ntop(AF_INET, pack("I", k.saddr)),
201 lport=k.lport,
202 daddr=inet_ntop(AF_INET, pack("I", k.daddr)),
203 dport=k.dport)
204
205def get_ipv6_session_key(k):
206 return TCPSessionKey(pid=k.pid,
207 laddr=inet_ntop(AF_INET6, pack("QQ", k.saddr0, k.saddr1)),
208 lport=k.lport,
209 daddr=inet_ntop(AF_INET6, pack("QQ", k.daddr0, k.daddr1)),
210 dport=k.dport)
211
Brendan Gregg60393ea2016-10-04 15:18:11 -0700212# initialize BPF
213b = BPF(text=bpf_text)
214
215ipv4_send_bytes = b["ipv4_send_bytes"]
216ipv4_recv_bytes = b["ipv4_recv_bytes"]
217ipv6_send_bytes = b["ipv6_send_bytes"]
218ipv6_recv_bytes = b["ipv6_recv_bytes"]
219
220print('Tracing... Output every %s secs. Hit Ctrl-C to end' % args.interval)
221
222# output
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -0700223i = 0
224exiting = False
225while i != args.count and not exiting:
Brendan Gregg60393ea2016-10-04 15:18:11 -0700226 try:
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -0700227 sleep(args.interval)
Brendan Gregg60393ea2016-10-04 15:18:11 -0700228 except KeyboardInterrupt:
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -0700229 exiting = True
Brendan Gregg60393ea2016-10-04 15:18:11 -0700230
231 # header
232 if args.noclear:
233 print()
234 else:
235 call("clear")
236 if not args.nosummary:
237 with open(loadavg) as stats:
238 print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read()))
239
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200240 # IPv4: build dict of all seen keys
241 ipv4_throughput = defaultdict(lambda: [0, 0])
Brendan Gregg60393ea2016-10-04 15:18:11 -0700242 for k, v in ipv4_send_bytes.items():
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200243 key = get_ipv4_session_key(k)
244 ipv4_throughput[key][0] = v.value
245 ipv4_send_bytes.clear()
Brendan Gregg60393ea2016-10-04 15:18:11 -0700246
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200247 for k, v in ipv4_recv_bytes.items():
248 key = get_ipv4_session_key(k)
249 ipv4_throughput[key][1] = v.value
250 ipv4_recv_bytes.clear()
251
252 if ipv4_throughput:
Brendan Gregg60393ea2016-10-04 15:18:11 -0700253 print("%-6s %-12s %-21s %-21s %6s %6s" % ("PID", "COMM",
254 "LADDR", "RADDR", "RX_KB", "TX_KB"))
255
256 # output
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200257 for k, (send_bytes, recv_bytes) in sorted(ipv4_throughput.items(),
258 key=lambda kv: sum(kv[1]),
259 reverse=True):
Brendan Gregg60393ea2016-10-04 15:18:11 -0700260 print("%-6d %-12.12s %-21s %-21s %6d %6d" % (k.pid,
261 pid_to_comm(k.pid),
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200262 k.laddr + ":" + str(k.lport),
263 k.daddr + ":" + str(k.dport),
264 int(recv_bytes / 1024), int(send_bytes / 1024)))
Brendan Gregg60393ea2016-10-04 15:18:11 -0700265
266 # IPv6: build dict of all seen keys
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200267 ipv6_throughput = defaultdict(lambda: [0, 0])
Brendan Gregg60393ea2016-10-04 15:18:11 -0700268 for k, v in ipv6_send_bytes.items():
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200269 key = get_ipv6_session_key(k)
270 ipv6_throughput[key][0] = v.value
271 ipv6_send_bytes.clear()
Brendan Gregg60393ea2016-10-04 15:18:11 -0700272
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200273 for k, v in ipv6_recv_bytes.items():
274 key = get_ipv6_session_key(k)
275 ipv6_throughput[key][1] = v.value
276 ipv6_recv_bytes.clear()
277
278 if ipv6_throughput:
Brendan Gregg60393ea2016-10-04 15:18:11 -0700279 # more than 80 chars, sadly.
280 print("\n%-6s %-12s %-32s %-32s %6s %6s" % ("PID", "COMM",
281 "LADDR6", "RADDR6", "RX_KB", "TX_KB"))
282
283 # output
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200284 for k, (send_bytes, recv_bytes) in sorted(ipv6_throughput.items(),
285 key=lambda kv: sum(kv[1]),
286 reverse=True):
Brendan Gregg60393ea2016-10-04 15:18:11 -0700287 print("%-6d %-12.12s %-32s %-32s %6d %6d" % (k.pid,
288 pid_to_comm(k.pid),
Andreas Gerstmayrc64f4872018-07-06 14:59:07 +0200289 k.laddr + ":" + str(k.lport),
290 k.daddr + ":" + str(k.dport),
291 int(recv_bytes / 1024), int(send_bytes / 1024)))
Brendan Gregg60393ea2016-10-04 15:18:11 -0700292
Benjamin Poirier8e86b9e2017-07-27 16:07:06 -0700293 i += 1