blob: cb38c591d8f30311b0a49c7e796f9492ebd2f2d8 [file] [log] [blame]
Alexey Ivanovcc01a9c2019-01-16 09:50:46 -08001#!/usr/bin/python
Gerald Combsabdca972018-11-26 23:37:24 -07002# -*- coding: utf-8 -*-
Brendan Greggbbd9acd2018-03-20 18:35:12 -07003# @lint-avoid-python-3-compatibility-imports
4#
5# tcpstates Trace the TCP session state changes with durations.
6# For Linux, uses BCC, BPF. Embedded C.
7#
Hariharan Ananthakrishnan04893e32021-08-12 05:55:21 -07008# USAGE: tcpstates [-h] [-C] [-S] [interval [count]] [-4 | -6]
Brendan Greggbbd9acd2018-03-20 18:35:12 -07009#
10# This uses the sock:inet_sock_set_state tracepoint, added to Linux 4.16.
11# Linux 4.16 also adds more state transitions so that they can be traced.
12#
13# Copyright 2018 Netflix, Inc.
14# Licensed under the Apache License, Version 2.0 (the "License")
15#
16# 20-Mar-2018 Brendan Gregg Created this.
17
18from __future__ import print_function
19from bcc import BPF
20import argparse
21from socket import inet_ntop, AF_INET, AF_INET6
22from struct import pack
Gerald Combsabdca972018-11-26 23:37:24 -070023from time import strftime, time
24from os import getuid
Brendan Greggbbd9acd2018-03-20 18:35:12 -070025
26# arguments
27examples = """examples:
28 ./tcpstates # trace all TCP state changes
29 ./tcpstates -t # include timestamp column
30 ./tcpstates -T # include time column (HH:MM:SS)
Michael Prokopc14d02a2020-01-09 02:29:18 +010031 ./tcpstates -w # wider columns (fit IPv6)
Brendan Greggbbd9acd2018-03-20 18:35:12 -070032 ./tcpstates -stT # csv output, with times & timestamps
Gerald Combsabdca972018-11-26 23:37:24 -070033 ./tcpstates -Y # log events to the systemd journal
Brendan Greggbbd9acd2018-03-20 18:35:12 -070034 ./tcpstates -L 80 # only trace local port 80
35 ./tcpstates -L 80,81 # only trace local ports 80 and 81
36 ./tcpstates -D 80 # only trace remote port 80
Hariharan Ananthakrishnan04893e32021-08-12 05:55:21 -070037 ./tcpstates -4 # trace IPv4 family only
38 ./tcpstates -6 # trace IPv6 family only
Brendan Greggbbd9acd2018-03-20 18:35:12 -070039"""
40parser = argparse.ArgumentParser(
41 description="Trace TCP session state changes and durations",
42 formatter_class=argparse.RawDescriptionHelpFormatter,
43 epilog=examples)
44parser.add_argument("-T", "--time", action="store_true",
45 help="include time column on output (HH:MM:SS)")
46parser.add_argument("-t", "--timestamp", action="store_true",
47 help="include timestamp on output (seconds)")
48parser.add_argument("-w", "--wide", action="store_true",
49 help="wide column output (fits IPv6 addresses)")
50parser.add_argument("-s", "--csv", action="store_true",
51 help="comma separated values output")
52parser.add_argument("-L", "--localport",
53 help="comma-separated list of local ports to trace.")
54parser.add_argument("-D", "--remoteport",
55 help="comma-separated list of remote ports to trace.")
56parser.add_argument("--ebpf", action="store_true",
57 help=argparse.SUPPRESS)
Gerald Combsabdca972018-11-26 23:37:24 -070058parser.add_argument("-Y", "--journal", action="store_true",
59 help="log session state changes to the systemd journal")
Hariharan Ananthakrishnan04893e32021-08-12 05:55:21 -070060group = parser.add_mutually_exclusive_group()
61group.add_argument("-4", "--ipv4", action="store_true",
62 help="trace IPv4 family only")
63group.add_argument("-6", "--ipv6", action="store_true",
64 help="trace IPv6 family only")
Brendan Greggbbd9acd2018-03-20 18:35:12 -070065args = parser.parse_args()
66debug = 0
67
68# define BPF program
Daniel Poelzleithnerd0ec8a22020-03-12 12:31:39 +010069bpf_header = """
Brendan Greggbbd9acd2018-03-20 18:35:12 -070070#include <uapi/linux/ptrace.h>
Brendan Greggbbd9acd2018-03-20 18:35:12 -070071#include <linux/tcp.h>
72#include <net/sock.h>
73#include <bcc/proto.h>
74
75BPF_HASH(last, struct sock *, u64);
76
77// separate data structs for ipv4 and ipv6
78struct ipv4_data_t {
Brendan Greggbbd9acd2018-03-20 18:35:12 -070079 u64 ts_us;
80 u64 skaddr;
Joe Yin36ce1122018-08-17 06:04:00 +080081 u32 saddr;
82 u32 daddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070083 u64 span_us;
Brendan Gregg2b23de62018-03-21 15:41:16 -070084 u32 pid;
85 u32 ports;
86 u32 oldstate;
87 u32 newstate;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070088 char task[TASK_COMM_LEN];
89};
90BPF_PERF_OUTPUT(ipv4_events);
91
92struct ipv6_data_t {
93 u64 ts_us;
94 u64 skaddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070095 unsigned __int128 saddr;
96 unsigned __int128 daddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070097 u64 span_us;
Brendan Gregg2b23de62018-03-21 15:41:16 -070098 u32 pid;
99 u32 ports;
100 u32 oldstate;
101 u32 newstate;
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700102 char task[TASK_COMM_LEN];
103};
104BPF_PERF_OUTPUT(ipv6_events);
105
106struct id_t {
107 u32 pid;
108 char task[TASK_COMM_LEN];
109};
Daniel Poelzleithnerd0ec8a22020-03-12 12:31:39 +0100110"""
111bpf_text_tracepoint = """
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700112TRACEPOINT_PROBE(sock, inet_sock_set_state)
113{
114 if (args->protocol != IPPROTO_TCP)
115 return 0;
116
117 u32 pid = bpf_get_current_pid_tgid() >> 32;
118 // sk is used as a UUID
119 struct sock *sk = (struct sock *)args->skaddr;
120
121 // lport is either used in a filter here, or later
122 u16 lport = args->sport;
123 FILTER_LPORT
124
125 // dport is either used in a filter here, or later
126 u16 dport = args->dport;
127 FILTER_DPORT
128
129 // calculate delta
130 u64 *tsp, delta_us;
131 tsp = last.lookup(&sk);
132 if (tsp == 0)
133 delta_us = 0;
134 else
135 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
Hariharan Ananthakrishnan04893e32021-08-12 05:55:21 -0700136 u16 family = args->family;
137 FILTER_FAMILY
138
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700139 if (args->family == AF_INET) {
140 struct ipv4_data_t data4 = {
141 .span_us = delta_us,
Marko Myllynenbfbf17e2018-09-11 21:49:58 +0300142 .oldstate = args->oldstate,
143 .newstate = args->newstate };
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700144 data4.skaddr = (u64)args->skaddr;
145 data4.ts_us = bpf_ktime_get_ns() / 1000;
Joe Yin36ce1122018-08-17 06:04:00 +0800146 __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
147 __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700148 // a workaround until data4 compiles with separate lport/dport
Xiaozhou Liu00213fc2019-06-17 23:28:16 +0800149 data4.ports = dport + ((0ULL + lport) << 16);
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700150 data4.pid = pid;
151
152 bpf_get_current_comm(&data4.task, sizeof(data4.task));
153 ipv4_events.perf_submit(args, &data4, sizeof(data4));
154
155 } else /* 6 */ {
156 struct ipv6_data_t data6 = {
157 .span_us = delta_us,
Marko Myllynenbfbf17e2018-09-11 21:49:58 +0300158 .oldstate = args->oldstate,
159 .newstate = args->newstate };
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700160 data6.skaddr = (u64)args->skaddr;
161 data6.ts_us = bpf_ktime_get_ns() / 1000;
Joe Yin36ce1122018-08-17 06:04:00 +0800162 __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr));
163 __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr));
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700164 // a workaround until data6 compiles with separate lport/dport
Xiaozhou Liu00213fc2019-06-17 23:28:16 +0800165 data6.ports = dport + ((0ULL + lport) << 16);
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700166 data6.pid = pid;
167 bpf_get_current_comm(&data6.task, sizeof(data6.task));
168 ipv6_events.perf_submit(args, &data6, sizeof(data6));
169 }
170
Jerome Marchand8bab4542021-03-05 14:58:06 +0100171 if (args->newstate == TCP_CLOSE) {
172 last.delete(&sk);
173 } else {
174 u64 ts = bpf_ktime_get_ns();
175 last.update(&sk, &ts);
176 }
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700177
178 return 0;
179}
180"""
181
Daniel Poelzleithnerd0ec8a22020-03-12 12:31:39 +0100182bpf_text_kprobe = """
183int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
184{
Daniel Poelzleithnerd0ec8a22020-03-12 12:31:39 +0100185 u32 pid = bpf_get_current_pid_tgid() >> 32;
186 // sk is used as a UUID
187
188 // lport is either used in a filter here, or later
189 u16 lport = sk->__sk_common.skc_num;
190 FILTER_LPORT
191
192 // dport is either used in a filter here, or later
193 u16 dport = sk->__sk_common.skc_dport;
Rosen10dac5f2021-08-04 02:26:23 +0800194 dport = ntohs(dport);
Daniel Poelzleithnerd0ec8a22020-03-12 12:31:39 +0100195 FILTER_DPORT
196
197 // calculate delta
198 u64 *tsp, delta_us;
199 tsp = last.lookup(&sk);
200 if (tsp == 0)
201 delta_us = 0;
202 else
203 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
204
205 u16 family = sk->__sk_common.skc_family;
Hariharan Ananthakrishnan04893e32021-08-12 05:55:21 -0700206 FILTER_FAMILY
207
Daniel Poelzleithnerd0ec8a22020-03-12 12:31:39 +0100208 if (family == AF_INET) {
209 struct ipv4_data_t data4 = {
210 .span_us = delta_us,
211 .oldstate = sk->__sk_common.skc_state,
212 .newstate = state };
213 data4.skaddr = (u64)sk;
214 data4.ts_us = bpf_ktime_get_ns() / 1000;
215 data4.saddr = sk->__sk_common.skc_rcv_saddr;
216 data4.daddr = sk->__sk_common.skc_daddr;
217 // a workaround until data4 compiles with separate lport/dport
218 data4.ports = dport + ((0ULL + lport) << 16);
219 data4.pid = pid;
220
221 bpf_get_current_comm(&data4.task, sizeof(data4.task));
222 ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
223
224 } else /* 6 */ {
225 struct ipv6_data_t data6 = {
226 .span_us = delta_us,
227 .oldstate = sk->__sk_common.skc_state,
228 .newstate = state };
229 data6.skaddr = (u64)sk;
230 data6.ts_us = bpf_ktime_get_ns() / 1000;
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500231 bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr),
Daniel Poelzleithnerd0ec8a22020-03-12 12:31:39 +0100232 sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500233 bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr),
Daniel Poelzleithnerd0ec8a22020-03-12 12:31:39 +0100234 sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
235 // a workaround until data6 compiles with separate lport/dport
236 data6.ports = dport + ((0ULL + lport) << 16);
237 data6.pid = pid;
238 bpf_get_current_comm(&data6.task, sizeof(data6.task));
239 ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
240 }
241
Jerome Marchand8bab4542021-03-05 14:58:06 +0100242 if (state == TCP_CLOSE) {
243 last.delete(&sk);
244 } else {
245 u64 ts = bpf_ktime_get_ns();
246 last.update(&sk, &ts);
247 }
Daniel Poelzleithnerd0ec8a22020-03-12 12:31:39 +0100248
249 return 0;
250
251};
252"""
253
254bpf_text = bpf_header
255if (BPF.tracepoint_exists("sock", "inet_sock_set_state")):
256 bpf_text += bpf_text_tracepoint
257else:
258 bpf_text += bpf_text_kprobe
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700259
260# code substitutions
261if args.remoteport:
262 dports = [int(dport) for dport in args.remoteport.split(',')]
263 dports_if = ' && '.join(['dport != %d' % dport for dport in dports])
264 bpf_text = bpf_text.replace('FILTER_DPORT',
265 'if (%s) { last.delete(&sk); return 0; }' % dports_if)
266if args.localport:
267 lports = [int(lport) for lport in args.localport.split(',')]
268 lports_if = ' && '.join(['lport != %d' % lport for lport in lports])
269 bpf_text = bpf_text.replace('FILTER_LPORT',
270 'if (%s) { last.delete(&sk); return 0; }' % lports_if)
Hariharan Ananthakrishnan04893e32021-08-12 05:55:21 -0700271if args.ipv4:
272 bpf_text = bpf_text.replace('FILTER_FAMILY',
273 'if (family != AF_INET) { return 0; }')
274elif args.ipv6:
275 bpf_text = bpf_text.replace('FILTER_FAMILY',
276 'if (family != AF_INET6) { return 0; }')
277bpf_text = bpf_text.replace('FILTER_FAMILY', '')
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700278bpf_text = bpf_text.replace('FILTER_DPORT', '')
279bpf_text = bpf_text.replace('FILTER_LPORT', '')
280
281if debug or args.ebpf:
282 print(bpf_text)
283 if args.ebpf:
284 exit()
285
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700286#
287# Setup output formats
288#
289# Don't change the default output (next 2 lines): this fits in 80 chars. I
290# know it doesn't have NS or UIDs etc. I know. If you really, really, really
291# need to add columns, columns that solve real actual problems, I'd start by
292# adding an extended mode (-x) to included those columns.
293#
294header_string = "%-16s %-5s %-10.10s %s%-15s %-5s %-15s %-5s %-11s -> %-11s %s"
295format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
296 "-> %-11s %.3f")
297if args.wide:
298 header_string = ("%-16s %-5s %-16.16s %-2s %-26s %-5s %-26s %-5s %-11s " +
299 "-> %-11s %s")
300 format_string = ("%-16x %-5d %-16.16s %-2s %-26s %-5s %-26s %-5d %-11s " +
301 "-> %-11s %.3f")
302if args.csv:
303 header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
304 format_string = "%x,%d,%s,%s,%s,%s,%s,%d,%s,%s,%.3f"
305
Gerald Combsabdca972018-11-26 23:37:24 -0700306if args.journal:
307 try:
308 from systemd import journal
309 except ImportError:
310 print("ERROR: Journal logging requires the systemd.journal module")
311 exit(1)
312
313
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700314def tcpstate2str(state):
315 # from include/net/tcp_states.h:
316 tcpstate = {
317 1: "ESTABLISHED",
318 2: "SYN_SENT",
319 3: "SYN_RECV",
320 4: "FIN_WAIT1",
321 5: "FIN_WAIT2",
322 6: "TIME_WAIT",
323 7: "CLOSE",
324 8: "CLOSE_WAIT",
325 9: "LAST_ACK",
326 10: "LISTEN",
327 11: "CLOSING",
328 12: "NEW_SYN_RECV",
329 }
330
331 if state in tcpstate:
332 return tcpstate[state]
333 else:
334 return str(state)
335
Gerald Combsabdca972018-11-26 23:37:24 -0700336def journal_fields(event, addr_family):
337 addr_pfx = 'IPV4'
338 if addr_family == AF_INET6:
339 addr_pfx = 'IPV6'
340
341 fields = {
342 # Standard fields described in systemd.journal-fields(7). journal.send
343 # will fill in CODE_LINE, CODE_FILE, and CODE_FUNC for us. If we're
344 # root and specify OBJECT_PID, systemd-journald will add other OBJECT_*
345 # fields for us.
346 'SYSLOG_IDENTIFIER': 'tcpstates',
347 'PRIORITY': 5,
348 '_SOURCE_REALTIME_TIMESTAMP': time() * 1000000,
349 'OBJECT_PID': str(event.pid),
350 'OBJECT_COMM': event.task.decode('utf-8', 'replace'),
351 # Custom fields, aka "stuff we sort of made up".
352 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
Xiaozhou Liu00213fc2019-06-17 23:28:16 +0800353 'OBJECT_TCP_SOURCE_PORT': str(event.ports >> 16),
Gerald Combsabdca972018-11-26 23:37:24 -0700354 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)),
Xiaozhou Liu00213fc2019-06-17 23:28:16 +0800355 'OBJECT_TCP_DESTINATION_PORT': str(event.ports & 0xffff),
Gerald Combsabdca972018-11-26 23:37:24 -0700356 'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate),
357 'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate),
358 'OBJECT_TCP_SPAN_TIME': str(event.span_us)
359 }
360
361 msg_format_string = (u"%(OBJECT_COMM)s " +
362 u"%(OBJECT_" + addr_pfx + "_SOURCE_ADDRESS)s " +
363 u"%(OBJECT_TCP_SOURCE_PORT)s → " +
364 u"%(OBJECT_" + addr_pfx + "_DESTINATION_ADDRESS)s " +
365 u"%(OBJECT_TCP_DESTINATION_PORT)s " +
366 u"%(OBJECT_TCP_OLD_STATE)s → %(OBJECT_TCP_NEW_STATE)s")
367 fields['MESSAGE'] = msg_format_string % (fields)
368
369 if getuid() == 0:
370 del fields['OBJECT_COMM'] # Handled by systemd-journald
371
372 return fields
373
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700374# process event
375def print_ipv4_event(cpu, data, size):
Xiaozhou Liu51d62d32019-02-15 13:03:05 +0800376 event = b["ipv4_events"].event(data)
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700377 global start_ts
378 if args.time:
379 if args.csv:
380 print("%s," % strftime("%H:%M:%S"), end="")
381 else:
382 print("%-8s " % strftime("%H:%M:%S"), end="")
383 if args.timestamp:
384 if start_ts == 0:
385 start_ts = event.ts_us
386 delta_s = (float(event.ts_us) - start_ts) / 1000000
387 if args.csv:
388 print("%.6f," % delta_s, end="")
389 else:
390 print("%-9.6f " % delta_s, end="")
jeromemarchandb96ebcd2018-10-10 01:58:15 +0200391 print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700392 "4" if args.wide or args.csv else "",
Xiaozhou Liu00213fc2019-06-17 23:28:16 +0800393 inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 16,
394 inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffff,
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700395 tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
396 float(event.span_us) / 1000))
Gerald Combsabdca972018-11-26 23:37:24 -0700397 if args.journal:
398 journal.send(**journal_fields(event, AF_INET))
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700399
400def print_ipv6_event(cpu, data, size):
Xiaozhou Liu51d62d32019-02-15 13:03:05 +0800401 event = b["ipv6_events"].event(data)
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700402 global start_ts
403 if args.time:
404 if args.csv:
405 print("%s," % strftime("%H:%M:%S"), end="")
406 else:
407 print("%-8s " % strftime("%H:%M:%S"), end="")
408 if args.timestamp:
409 if start_ts == 0:
410 start_ts = event.ts_us
411 delta_s = (float(event.ts_us) - start_ts) / 1000000
412 if args.csv:
413 print("%.6f," % delta_s, end="")
414 else:
415 print("%-9.6f " % delta_s, end="")
jeromemarchandb96ebcd2018-10-10 01:58:15 +0200416 print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700417 "6" if args.wide or args.csv else "",
Xiaozhou Liu00213fc2019-06-17 23:28:16 +0800418 inet_ntop(AF_INET6, event.saddr), event.ports >> 16,
419 inet_ntop(AF_INET6, event.daddr), event.ports & 0xffff,
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700420 tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
421 float(event.span_us) / 1000))
Gerald Combsabdca972018-11-26 23:37:24 -0700422 if args.journal:
423 journal.send(**journal_fields(event, AF_INET6))
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700424
425# initialize BPF
426b = BPF(text=bpf_text)
427
428# header
429if args.time:
430 if args.csv:
431 print("%s," % ("TIME"), end="")
432 else:
433 print("%-8s " % ("TIME"), end="")
434if args.timestamp:
435 if args.csv:
436 print("%s," % ("TIME(s)"), end="")
437 else:
438 print("%-9s " % ("TIME(s)"), end="")
439print(header_string % ("SKADDR", "C-PID", "C-COMM",
440 "IP" if args.wide or args.csv else "",
441 "LADDR", "LPORT", "RADDR", "RPORT",
442 "OLDSTATE", "NEWSTATE", "MS"))
443
444start_ts = 0
445
446# read events
447b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64)
448b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64)
449while 1:
Jerome Marchand51671272018-12-19 01:57:24 +0100450 try:
451 b.perf_buffer_poll()
452 except KeyboardInterrupt:
453 exit()