blob: 490e56860e29466edb2640b294a8cdd8e111a625 [file] [log] [blame]
Brendan Gregg797c3ec2016-10-19 18:55:10 -07001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# tcplife Trace the lifespan of TCP sessions and summarize.
5# For Linux, uses BCC, BPF. Embedded C.
6#
7# USAGE: tcplife [-h] [-C] [-S] [-p PID] [interval [count]]
8#
9# This uses dynamic tracing of kernel functions, and will need to be updated
10# to match kernel changes.
11#
12# While throughput counters are emitted, they are fetched in a low-overhead
13# manner: reading members of the tcp_info struct on TCP close. ie, we do not
14# trace send/receive.
15#
16# Copyright 2016 Netflix, Inc.
17# Licensed under the Apache License, Version 2.0 (the "License")
18#
19# IDEA: Julia Evans
20#
21# 18-Oct-2016 Brendan Gregg Created this.
22
23from __future__ import print_function
24from bcc import BPF
25import argparse
26from socket import inet_ntop, ntohs, AF_INET, AF_INET6
27from struct import pack
28import ctypes as ct
29from time import strftime
30
31# arguments
32examples = """examples:
33 ./tcplife # trace all TCP connect()s
34 ./tcplife -t # include time column (HH:MM:SS)
35 ./tcplife -w # wider colums (fit IPv6)
36 ./tcplife -stT # csv output, with times & timestamps
37 ./tcplife -p 181 # only trace PID 181
38 ./tcplife -L 80 # only trace local port 80
39 ./tcplife -L 80,81 # only trace local ports 80 and 81
40 ./tcplife -D 80 # only trace remote port 80
41"""
42parser = argparse.ArgumentParser(
43 description="Trace the lifespan of TCP sessions and summarize",
44 formatter_class=argparse.RawDescriptionHelpFormatter,
45 epilog=examples)
46parser.add_argument("-T", "--time", action="store_true",
47 help="include time column on output (HH:MM:SS)")
48parser.add_argument("-t", "--timestamp", action="store_true",
49 help="include timestamp on output (seconds)")
50parser.add_argument("-w", "--wide", action="store_true",
51 help="wide column output (fits IPv6 addresses)")
52parser.add_argument("-s", "--csv", action="store_true",
53 help="comma seperated values output")
54parser.add_argument("-p", "--pid",
55 help="trace this PID only")
56parser.add_argument("-L", "--localport",
57 help="comma-separated list of local ports to trace.")
58parser.add_argument("-D", "--remoteport",
59 help="comma-separated list of remote ports to trace.")
60args = parser.parse_args()
61debug = 0
62
63# define BPF program
64bpf_text = """
65#include <uapi/linux/ptrace.h>
66#define KBUILD_MODNAME "foo"
67#include <linux/tcp.h>
68#include <net/sock.h>
69#include <bcc/proto.h>
70
71BPF_HASH(birth, struct sock *, u64);
72
73// separate data structs for ipv4 and ipv6
74struct ipv4_data_t {
75 // XXX: switch some to u32's when supported
76 u64 ts_us;
77 u64 pid;
78 u64 saddr;
79 u64 daddr;
80 u64 ports;
81 u64 rx_b;
82 u64 tx_b;
83 u64 span_us;
84 char task[TASK_COMM_LEN];
85};
86BPF_PERF_OUTPUT(ipv4_events);
87
88struct ipv6_data_t {
89 u64 ts_us;
90 u64 pid;
91 unsigned __int128 saddr;
92 unsigned __int128 daddr;
93 u64 ports;
94 u64 rx_b;
95 u64 tx_b;
96 u64 span_us;
97 char task[TASK_COMM_LEN];
98};
99BPF_PERF_OUTPUT(ipv6_events);
100
101struct id_t {
102 u32 pid;
103 char task[TASK_COMM_LEN];
104};
105BPF_HASH(whoami, struct sock *, struct id_t);
106
107int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
108{
109 u32 pid = bpf_get_current_pid_tgid() >> 32;
110
111 // lport is either used in a filter here, or later
112 u16 lport = sk->__sk_common.skc_num;
113 FILTER_LPORT
114
115 // dport is either used in a filter here, or later
116 u16 dport = sk->__sk_common.skc_dport;
117 FILTER_DPORT
118
119 /*
120 * This tool includes PID and comm context. It's best effort, and may
121 * be wrong in some situations. It currently works like this:
Brendan Gregg4fd7d322016-11-28 17:57:20 -0800122 * - record timestamp on any state < TCP_FIN_WAIT1
123 * - cache task context on:
124 * TCP_SYN_SENT: tracing from client
125 * TCP_LAST_ACK: client-closed from server
126 * - do output on TCP_CLOSE:
127 * fetch task context if cached, or use current task
128 */
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700129
Brendan Gregg42d00a42016-11-30 16:55:45 -0800130 // capture birth time
131 if (state < TCP_FIN_WAIT1) {
132 /*
133 * Matching just ESTABLISHED may be sufficient, provided no code-path
134 * sets ESTABLISHED without a tcp_set_state() call. Until we know
135 * that for sure, match all early states to increase chances a
136 * timestamp is set.
137 * Note that this needs to be set before the PID filter later on,
138 * since the PID isn't reliable for these early stages, so we must
139 * save all timestamps and do the PID filter later when we can.
140 */
141 u64 ts = bpf_ktime_get_ns();
142 birth.update(&sk, &ts);
143 }
144
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700145 // record PID & comm on SYN_SENT
Brendan Gregg4fd7d322016-11-28 17:57:20 -0800146 if (state == TCP_SYN_SENT || state == TCP_LAST_ACK) {
Brendan Gregg42d00a42016-11-30 16:55:45 -0800147 // now we can PID filter, both here and a little later on for CLOSE
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700148 FILTER_PID
149 struct id_t me = {.pid = pid};
150 bpf_get_current_comm(&me.task, sizeof(me.task));
151 whoami.update(&sk, &me);
152 }
153
Brendan Gregg4fd7d322016-11-28 17:57:20 -0800154 if (state != TCP_CLOSE)
155 return 0;
156
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700157 // calculate lifespan
158 u64 *tsp, delta_us;
159 tsp = birth.lookup(&sk);
160 if (tsp == 0) {
Brendan Gregg42d00a42016-11-30 16:55:45 -0800161 whoami.delete(&sk); // may not exist
162 return 0; // missed create
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700163 }
164 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
Brendan Gregg42d00a42016-11-30 16:55:45 -0800165 birth.delete(&sk);
166
167 // fetch possible cached data, and filter
168 struct id_t *mep;
169 mep = whoami.lookup(&sk);
170 if (mep != 0)
171 pid = mep->pid;
172 FILTER_PID
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700173
174 // get throughput stats. see tcp_get_info().
175 u64 rx_b = 0, tx_b = 0, sport = 0;
176 struct tcp_sock *tp = (struct tcp_sock *)sk;
177 rx_b = tp->bytes_received;
178 tx_b = tp->bytes_acked;
179
180 u16 family = sk->__sk_common.skc_family;
181
182 if (family == AF_INET) {
183 struct ipv4_data_t data4 = {.span_us = delta_us,
184 .rx_b = rx_b, .tx_b = tx_b};
185 data4.ts_us = bpf_ktime_get_ns() / 1000;
186 data4.saddr = sk->__sk_common.skc_rcv_saddr;
187 data4.daddr = sk->__sk_common.skc_daddr;
188 // a workaround until data4 compiles with separate lport/dport
Brendan Gregg4fd7d322016-11-28 17:57:20 -0800189 data4.pid = pid;
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700190 data4.ports = ntohs(dport) + ((0ULL + lport) << 32);
191 if (mep == 0) {
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700192 bpf_get_current_comm(&data4.task, sizeof(data4.task));
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700193 } else {
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700194 bpf_probe_read(&data4.task, sizeof(data4.task), (void *)mep->task);
195 }
196 ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
197
198 } else /* 6 */ {
199 struct ipv6_data_t data6 = {.span_us = delta_us,
200 .rx_b = rx_b, .tx_b = tx_b};
201 data6.ts_us = bpf_ktime_get_ns() / 1000;
202 bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
203 sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
204 bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
205 sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
206 // a workaround until data6 compiles with separate lport/dport
207 data6.ports = ntohs(dport) + ((0ULL + lport) << 32);
Brendan Gregg4fd7d322016-11-28 17:57:20 -0800208 data6.pid = pid;
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700209 if (mep == 0) {
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700210 bpf_get_current_comm(&data6.task, sizeof(data6.task));
211 } else {
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700212 bpf_probe_read(&data6.task, sizeof(data6.task), (void *)mep->task);
213 }
214 ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
215 }
216
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700217 if (mep != 0)
218 whoami.delete(&sk);
219
220 return 0;
221}
222"""
223
224# code substitutions
225if args.pid:
226 bpf_text = bpf_text.replace('FILTER_PID',
227 'if (pid != %s) { return 0; }' % args.pid)
228if args.remoteport:
229 dports = [int(dport) for dport in args.remoteport.split(',')]
230 dports_if = ' && '.join(['dport != %d' % ntohs(dport) for dport in dports])
231 bpf_text = bpf_text.replace('FILTER_DPORT',
232 'if (%s) { birth.delete(&sk); return 0; }' % dports_if)
233if args.localport:
234 lports = [int(lport) for lport in args.localport.split(',')]
235 lports_if = ' && '.join(['lport != %d' % lport for lport in lports])
236 bpf_text = bpf_text.replace('FILTER_LPORT',
237 'if (%s) { birth.delete(&sk); return 0; }' % lports_if)
238bpf_text = bpf_text.replace('FILTER_PID', '')
239bpf_text = bpf_text.replace('FILTER_DPORT', '')
240bpf_text = bpf_text.replace('FILTER_LPORT', '')
241
242if debug:
243 print(bpf_text)
244
245# event data
246TASK_COMM_LEN = 16 # linux/sched.h
247
248class Data_ipv4(ct.Structure):
249 _fields_ = [
250 ("ts_us", ct.c_ulonglong),
251 ("pid", ct.c_ulonglong),
252 ("saddr", ct.c_ulonglong),
253 ("daddr", ct.c_ulonglong),
254 ("ports", ct.c_ulonglong),
255 ("rx_b", ct.c_ulonglong),
256 ("tx_b", ct.c_ulonglong),
257 ("span_us", ct.c_ulonglong),
258 ("task", ct.c_char * TASK_COMM_LEN)
259 ]
260
261class Data_ipv6(ct.Structure):
262 _fields_ = [
263 ("ts_us", ct.c_ulonglong),
264 ("pid", ct.c_ulonglong),
265 ("saddr", (ct.c_ulonglong * 2)),
266 ("daddr", (ct.c_ulonglong * 2)),
267 ("ports", ct.c_ulonglong),
268 ("rx_b", ct.c_ulonglong),
269 ("tx_b", ct.c_ulonglong),
270 ("span_us", ct.c_ulonglong),
271 ("task", ct.c_char * TASK_COMM_LEN)
272 ]
273
274#
275# Setup output formats
276#
277# Don't change the default output (next 2 lines): this fits in 80 chars. I
278# know it doesn't have NS or UIDs etc. I know. If you really, really, really
279# need to add columns, columns that solve real actual problems, I'd start by
280# adding an extended mode (-x) to included those columns.
281#
282header_string = "%-5s %-10.10s %s%-15s %-5s %-15s %-5s %5s %5s %s"
283format_string = "%-5d %-10.10s %s%-15s %-5d %-15s %-5d %5d %5d %.2f"
284if args.wide:
285 header_string = "%-5s %-16.16s %-2s %-26s %-5s %-26s %-5s %6s %6s %s"
286 format_string = "%-5d %-16.16s %-2s %-26s %-5s %-26s %-5d %6d %6d %.2f"
287if args.csv:
288 header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
289 format_string = "%d,%s,%s,%s,%s,%s,%d,%d,%d,%.2f"
290
291# process event
292def print_ipv4_event(cpu, data, size):
293 event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
294 global start_ts
295 if args.time:
296 if args.csv:
297 print("%s," % strftime("%H:%M:%S"), end="")
298 else:
299 print("%-8s " % strftime("%H:%M:%S"), end="")
300 if args.timestamp:
301 if start_ts == 0:
302 start_ts = event.ts_us
303 delta_s = (float(event.ts_us) - start_ts) / 1000000
304 if args.csv:
305 print("%.6f," % delta_s, end="")
306 else:
307 print("%-9.6f " % delta_s, end="")
Rafael F78948e42017-03-26 14:54:25 +0200308 print(format_string % (event.pid, event.task.decode(),
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700309 "4" if args.wide or args.csv else "",
310 inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 32,
311 inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffffffff,
312 event.tx_b / 1024, event.rx_b / 1024, float(event.span_us) / 1000))
313
314def print_ipv6_event(cpu, data, size):
315 event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
316 global start_ts
317 if args.time:
318 if args.csv:
319 print("%s," % strftime("%H:%M:%S"), end="")
320 else:
321 print("%-8s " % strftime("%H:%M:%S"), end="")
322 if args.timestamp:
323 if start_ts == 0:
324 start_ts = event.ts_us
325 delta_s = (float(event.ts_us) - start_ts) / 1000000
326 if args.csv:
327 print("%.6f," % delta_s, end="")
328 else:
329 print("%-9.6f " % delta_s, end="")
Rafael F78948e42017-03-26 14:54:25 +0200330 print(format_string % (event.pid, event.task.decode(),
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700331 "6" if args.wide or args.csv else "",
332 inet_ntop(AF_INET6, event.saddr), event.ports >> 32,
333 inet_ntop(AF_INET6, event.daddr), event.ports & 0xffffffff,
334 event.tx_b / 1024, event.rx_b / 1024, float(event.span_us) / 1000))
335
336# initialize BPF
337b = BPF(text=bpf_text)
338
339# header
340if args.time:
341 if args.csv:
342 print("%s," % ("TIME"), end="")
343 else:
344 print("%-8s " % ("TIME"), end="")
345if args.timestamp:
346 if args.csv:
347 print("%s," % ("TIME(s)"), end="")
348 else:
349 print("%-9s " % ("TIME(s)"), end="")
350print(header_string % ("PID", "COMM",
351 "IP" if args.wide or args.csv else "", "LADDR",
352 "LPORT", "RADDR", "RPORT", "TX_KB", "RX_KB", "MS"))
353
354start_ts = 0
355
356# read events
Mark Drayton5f5687e2017-02-20 18:13:03 +0000357b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64)
358b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64)
Brendan Gregg797c3ec2016-10-19 18:55:10 -0700359while 1:
360 b.kprobe_poll()