blob: e0e05bf436b4536fc7ef5d88493394856ab32500 [file] [log] [blame]
Brendan Greggbbd9acd2018-03-20 18:35:12 -07001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# tcpstates Trace the TCP session state changes with durations.
5# For Linux, uses BCC, BPF. Embedded C.
6#
7# USAGE: tcpstates [-h] [-C] [-S] [interval [count]]
8#
9# This uses the sock:inet_sock_set_state tracepoint, added to Linux 4.16.
10# Linux 4.16 also adds more state transitions so that they can be traced.
11#
12# Copyright 2018 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 20-Mar-2018 Brendan Gregg Created this.
16
17from __future__ import print_function
18from bcc import BPF
19import argparse
20from socket import inet_ntop, AF_INET, AF_INET6
21from struct import pack
22import ctypes as ct
23from time import strftime
24
25# arguments
26examples = """examples:
27 ./tcpstates # trace all TCP state changes
28 ./tcpstates -t # include timestamp column
29 ./tcpstates -T # include time column (HH:MM:SS)
30 ./tcpstates -w # wider colums (fit IPv6)
31 ./tcpstates -stT # csv output, with times & timestamps
32 ./tcpstates -L 80 # only trace local port 80
33 ./tcpstates -L 80,81 # only trace local ports 80 and 81
34 ./tcpstates -D 80 # only trace remote port 80
35"""
36parser = argparse.ArgumentParser(
37 description="Trace TCP session state changes and durations",
38 formatter_class=argparse.RawDescriptionHelpFormatter,
39 epilog=examples)
40parser.add_argument("-T", "--time", action="store_true",
41 help="include time column on output (HH:MM:SS)")
42parser.add_argument("-t", "--timestamp", action="store_true",
43 help="include timestamp on output (seconds)")
44parser.add_argument("-w", "--wide", action="store_true",
45 help="wide column output (fits IPv6 addresses)")
46parser.add_argument("-s", "--csv", action="store_true",
47 help="comma separated values output")
48parser.add_argument("-L", "--localport",
49 help="comma-separated list of local ports to trace.")
50parser.add_argument("-D", "--remoteport",
51 help="comma-separated list of remote ports to trace.")
52parser.add_argument("--ebpf", action="store_true",
53 help=argparse.SUPPRESS)
54args = parser.parse_args()
55debug = 0
56
57# define BPF program
58bpf_text = """
59#include <uapi/linux/ptrace.h>
60#define KBUILD_MODNAME "foo"
61#include <linux/tcp.h>
62#include <net/sock.h>
63#include <bcc/proto.h>
64
65BPF_HASH(last, struct sock *, u64);
66
67// separate data structs for ipv4 and ipv6
68struct ipv4_data_t {
Brendan Greggbbd9acd2018-03-20 18:35:12 -070069 u64 ts_us;
70 u64 skaddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070071 u64 saddr;
72 u64 daddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070073 u64 span_us;
Brendan Gregg2b23de62018-03-21 15:41:16 -070074 u32 pid;
75 u32 ports;
76 u32 oldstate;
77 u32 newstate;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070078 char task[TASK_COMM_LEN];
79};
80BPF_PERF_OUTPUT(ipv4_events);
81
82struct ipv6_data_t {
83 u64 ts_us;
84 u64 skaddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070085 unsigned __int128 saddr;
86 unsigned __int128 daddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070087 u64 span_us;
Brendan Gregg2b23de62018-03-21 15:41:16 -070088 u32 pid;
89 u32 ports;
90 u32 oldstate;
91 u32 newstate;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070092 char task[TASK_COMM_LEN];
93};
94BPF_PERF_OUTPUT(ipv6_events);
95
96struct id_t {
97 u32 pid;
98 char task[TASK_COMM_LEN];
99};
100
101TRACEPOINT_PROBE(sock, inet_sock_set_state)
102{
103 if (args->protocol != IPPROTO_TCP)
104 return 0;
105
106 u32 pid = bpf_get_current_pid_tgid() >> 32;
107 // sk is used as a UUID
108 struct sock *sk = (struct sock *)args->skaddr;
109
110 // lport is either used in a filter here, or later
111 u16 lport = args->sport;
112 FILTER_LPORT
113
114 // dport is either used in a filter here, or later
115 u16 dport = args->dport;
116 FILTER_DPORT
117
118 // calculate delta
119 u64 *tsp, delta_us;
120 tsp = last.lookup(&sk);
121 if (tsp == 0)
122 delta_us = 0;
123 else
124 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
125
126 if (args->family == AF_INET) {
127 struct ipv4_data_t data4 = {
128 .span_us = delta_us,
129 .oldstate = args->oldstate, .newstate = args->newstate};
130 data4.skaddr = (u64)args->skaddr;
131 data4.ts_us = bpf_ktime_get_ns() / 1000;
132 bpf_probe_read(&data4.saddr, sizeof(u32), args->saddr);
133 bpf_probe_read(&data4.daddr, sizeof(u32), args->daddr);
134 // a workaround until data4 compiles with separate lport/dport
135 data4.ports = dport + ((0ULL + lport) << 32);
136 data4.pid = pid;
137
138 bpf_get_current_comm(&data4.task, sizeof(data4.task));
139 ipv4_events.perf_submit(args, &data4, sizeof(data4));
140
141 } else /* 6 */ {
142 struct ipv6_data_t data6 = {
143 .span_us = delta_us,
144 .oldstate = args->oldstate, .newstate = args->newstate};
145 data6.skaddr = (u64)args->skaddr;
146 data6.ts_us = bpf_ktime_get_ns() / 1000;
147 bpf_probe_read(&data6.saddr, sizeof(data6.saddr), args->saddr_v6);
148 bpf_probe_read(&data6.daddr, sizeof(data6.daddr), args->saddr_v6);
149 // a workaround until data6 compiles with separate lport/dport
150 data6.ports = dport + ((0ULL + lport) << 32);
151 data6.pid = pid;
152 bpf_get_current_comm(&data6.task, sizeof(data6.task));
153 ipv6_events.perf_submit(args, &data6, sizeof(data6));
154 }
155
156 u64 ts = bpf_ktime_get_ns();
157 last.update(&sk, &ts);
158
159 return 0;
160}
161"""
162
163if (not BPF.tracepoint_exists("sock", "inet_sock_set_state")):
164 print("ERROR: tracepoint sock:inet_sock_set_state missing "
165 "(added in Linux 4.16). Exiting")
166 exit()
167
168# code substitutions
169if args.remoteport:
170 dports = [int(dport) for dport in args.remoteport.split(',')]
171 dports_if = ' && '.join(['dport != %d' % dport for dport in dports])
172 bpf_text = bpf_text.replace('FILTER_DPORT',
173 'if (%s) { last.delete(&sk); return 0; }' % dports_if)
174if args.localport:
175 lports = [int(lport) for lport in args.localport.split(',')]
176 lports_if = ' && '.join(['lport != %d' % lport for lport in lports])
177 bpf_text = bpf_text.replace('FILTER_LPORT',
178 'if (%s) { last.delete(&sk); return 0; }' % lports_if)
179bpf_text = bpf_text.replace('FILTER_DPORT', '')
180bpf_text = bpf_text.replace('FILTER_LPORT', '')
181
182if debug or args.ebpf:
183 print(bpf_text)
184 if args.ebpf:
185 exit()
186
187# event data
188TASK_COMM_LEN = 16 # linux/sched.h
189
190class Data_ipv4(ct.Structure):
191 _fields_ = [
192 ("ts_us", ct.c_ulonglong),
193 ("skaddr", ct.c_ulonglong),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700194 ("saddr", ct.c_ulonglong),
195 ("daddr", ct.c_ulonglong),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700196 ("span_us", ct.c_ulonglong),
Brendan Gregg2b23de62018-03-21 15:41:16 -0700197 ("pid", ct.c_uint),
198 ("ports", ct.c_uint),
199 ("oldstate", ct.c_uint),
200 ("newstate", ct.c_uint),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700201 ("task", ct.c_char * TASK_COMM_LEN)
202 ]
203
204class Data_ipv6(ct.Structure):
205 _fields_ = [
206 ("ts_us", ct.c_ulonglong),
207 ("skaddr", ct.c_ulonglong),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700208 ("saddr", (ct.c_ulonglong * 2)),
209 ("daddr", (ct.c_ulonglong * 2)),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700210 ("span_us", ct.c_ulonglong),
Brendan Gregg2b23de62018-03-21 15:41:16 -0700211 ("pid", ct.c_uint),
212 ("ports", ct.c_uint),
213 ("oldstate", ct.c_uint),
214 ("newstate", ct.c_uint),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700215 ("task", ct.c_char * TASK_COMM_LEN)
216 ]
217
218#
219# Setup output formats
220#
221# Don't change the default output (next 2 lines): this fits in 80 chars. I
222# know it doesn't have NS or UIDs etc. I know. If you really, really, really
223# need to add columns, columns that solve real actual problems, I'd start by
224# adding an extended mode (-x) to included those columns.
225#
226header_string = "%-16s %-5s %-10.10s %s%-15s %-5s %-15s %-5s %-11s -> %-11s %s"
227format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
228 "-> %-11s %.3f")
229if args.wide:
230 header_string = ("%-16s %-5s %-16.16s %-2s %-26s %-5s %-26s %-5s %-11s " +
231 "-> %-11s %s")
232 format_string = ("%-16x %-5d %-16.16s %-2s %-26s %-5s %-26s %-5d %-11s " +
233 "-> %-11s %.3f")
234if args.csv:
235 header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
236 format_string = "%x,%d,%s,%s,%s,%s,%s,%d,%s,%s,%.3f"
237
238def tcpstate2str(state):
239 # from include/net/tcp_states.h:
240 tcpstate = {
241 1: "ESTABLISHED",
242 2: "SYN_SENT",
243 3: "SYN_RECV",
244 4: "FIN_WAIT1",
245 5: "FIN_WAIT2",
246 6: "TIME_WAIT",
247 7: "CLOSE",
248 8: "CLOSE_WAIT",
249 9: "LAST_ACK",
250 10: "LISTEN",
251 11: "CLOSING",
252 12: "NEW_SYN_RECV",
253 }
254
255 if state in tcpstate:
256 return tcpstate[state]
257 else:
258 return str(state)
259
260# process event
261def print_ipv4_event(cpu, data, size):
262 event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
263 global start_ts
264 if args.time:
265 if args.csv:
266 print("%s," % strftime("%H:%M:%S"), end="")
267 else:
268 print("%-8s " % strftime("%H:%M:%S"), end="")
269 if args.timestamp:
270 if start_ts == 0:
271 start_ts = event.ts_us
272 delta_s = (float(event.ts_us) - start_ts) / 1000000
273 if args.csv:
274 print("%.6f," % delta_s, end="")
275 else:
276 print("%-9.6f " % delta_s, end="")
277 print(format_string % (event.skaddr, event.pid, event.task.decode(),
278 "4" if args.wide or args.csv else "",
279 inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 32,
280 inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffffffff,
281 tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
282 float(event.span_us) / 1000))
283
284def print_ipv6_event(cpu, data, size):
285 event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
286 global start_ts
287 if args.time:
288 if args.csv:
289 print("%s," % strftime("%H:%M:%S"), end="")
290 else:
291 print("%-8s " % strftime("%H:%M:%S"), end="")
292 if args.timestamp:
293 if start_ts == 0:
294 start_ts = event.ts_us
295 delta_s = (float(event.ts_us) - start_ts) / 1000000
296 if args.csv:
297 print("%.6f," % delta_s, end="")
298 else:
299 print("%-9.6f " % delta_s, end="")
300 print(format_string % (event.skaddr, event.pid, event.task.decode(),
301 "6" if args.wide or args.csv else "",
302 inet_ntop(AF_INET6, event.saddr), event.ports >> 32,
303 inet_ntop(AF_INET6, event.daddr), event.ports & 0xffffffff,
304 tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
305 float(event.span_us) / 1000))
306
307# initialize BPF
308b = BPF(text=bpf_text)
309
310# header
311if args.time:
312 if args.csv:
313 print("%s," % ("TIME"), end="")
314 else:
315 print("%-8s " % ("TIME"), end="")
316if args.timestamp:
317 if args.csv:
318 print("%s," % ("TIME(s)"), end="")
319 else:
320 print("%-9s " % ("TIME(s)"), end="")
321print(header_string % ("SKADDR", "C-PID", "C-COMM",
322 "IP" if args.wide or args.csv else "",
323 "LADDR", "LPORT", "RADDR", "RPORT",
324 "OLDSTATE", "NEWSTATE", "MS"))
325
326start_ts = 0
327
328# read events
329b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64)
330b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64)
331while 1:
332 b.perf_buffer_poll()