blob: 9b25b094a48c3897c8deaabac13bb318f9e8e319 [file] [log] [blame]
Alexey Ivanovcc01a9c2019-01-16 09:50:46 -08001#!/usr/bin/python
Gerald Combsabdca972018-11-26 23:37:24 -07002# -*- coding: utf-8 -*-
Brendan Greggbbd9acd2018-03-20 18:35:12 -07003# @lint-avoid-python-3-compatibility-imports
4#
5# tcpstates Trace the TCP session state changes with durations.
6# For Linux, uses BCC, BPF. Embedded C.
7#
8# USAGE: tcpstates [-h] [-C] [-S] [interval [count]]
9#
10# This uses the sock:inet_sock_set_state tracepoint, added to Linux 4.16.
11# Linux 4.16 also adds more state transitions so that they can be traced.
12#
13# Copyright 2018 Netflix, Inc.
14# Licensed under the Apache License, Version 2.0 (the "License")
15#
16# 20-Mar-2018 Brendan Gregg Created this.
17
18from __future__ import print_function
19from bcc import BPF
20import argparse
21from socket import inet_ntop, AF_INET, AF_INET6
22from struct import pack
Gerald Combsabdca972018-11-26 23:37:24 -070023from time import strftime, time
24from os import getuid
Brendan Greggbbd9acd2018-03-20 18:35:12 -070025
26# arguments
27examples = """examples:
28 ./tcpstates # trace all TCP state changes
29 ./tcpstates -t # include timestamp column
30 ./tcpstates -T # include time column (HH:MM:SS)
31 ./tcpstates -w # wider colums (fit IPv6)
32 ./tcpstates -stT # csv output, with times & timestamps
Gerald Combsabdca972018-11-26 23:37:24 -070033 ./tcpstates -Y # log events to the systemd journal
Brendan Greggbbd9acd2018-03-20 18:35:12 -070034 ./tcpstates -L 80 # only trace local port 80
35 ./tcpstates -L 80,81 # only trace local ports 80 and 81
36 ./tcpstates -D 80 # only trace remote port 80
37"""
38parser = argparse.ArgumentParser(
39 description="Trace TCP session state changes and durations",
40 formatter_class=argparse.RawDescriptionHelpFormatter,
41 epilog=examples)
42parser.add_argument("-T", "--time", action="store_true",
43 help="include time column on output (HH:MM:SS)")
44parser.add_argument("-t", "--timestamp", action="store_true",
45 help="include timestamp on output (seconds)")
46parser.add_argument("-w", "--wide", action="store_true",
47 help="wide column output (fits IPv6 addresses)")
48parser.add_argument("-s", "--csv", action="store_true",
49 help="comma separated values output")
50parser.add_argument("-L", "--localport",
51 help="comma-separated list of local ports to trace.")
52parser.add_argument("-D", "--remoteport",
53 help="comma-separated list of remote ports to trace.")
54parser.add_argument("--ebpf", action="store_true",
55 help=argparse.SUPPRESS)
Gerald Combsabdca972018-11-26 23:37:24 -070056parser.add_argument("-Y", "--journal", action="store_true",
57 help="log session state changes to the systemd journal")
Brendan Greggbbd9acd2018-03-20 18:35:12 -070058args = parser.parse_args()
59debug = 0
60
61# define BPF program
62bpf_text = """
63#include <uapi/linux/ptrace.h>
64#define KBUILD_MODNAME "foo"
65#include <linux/tcp.h>
66#include <net/sock.h>
67#include <bcc/proto.h>
68
69BPF_HASH(last, struct sock *, u64);
70
71// separate data structs for ipv4 and ipv6
72struct ipv4_data_t {
Brendan Greggbbd9acd2018-03-20 18:35:12 -070073 u64 ts_us;
74 u64 skaddr;
Joe Yin36ce1122018-08-17 06:04:00 +080075 u32 saddr;
76 u32 daddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070077 u64 span_us;
Brendan Gregg2b23de62018-03-21 15:41:16 -070078 u32 pid;
79 u32 ports;
80 u32 oldstate;
81 u32 newstate;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070082 char task[TASK_COMM_LEN];
83};
84BPF_PERF_OUTPUT(ipv4_events);
85
86struct ipv6_data_t {
87 u64 ts_us;
88 u64 skaddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070089 unsigned __int128 saddr;
90 unsigned __int128 daddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070091 u64 span_us;
Brendan Gregg2b23de62018-03-21 15:41:16 -070092 u32 pid;
93 u32 ports;
94 u32 oldstate;
95 u32 newstate;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070096 char task[TASK_COMM_LEN];
97};
98BPF_PERF_OUTPUT(ipv6_events);
99
100struct id_t {
101 u32 pid;
102 char task[TASK_COMM_LEN];
103};
104
105TRACEPOINT_PROBE(sock, inet_sock_set_state)
106{
107 if (args->protocol != IPPROTO_TCP)
108 return 0;
109
110 u32 pid = bpf_get_current_pid_tgid() >> 32;
111 // sk is used as a UUID
112 struct sock *sk = (struct sock *)args->skaddr;
113
114 // lport is either used in a filter here, or later
115 u16 lport = args->sport;
116 FILTER_LPORT
117
118 // dport is either used in a filter here, or later
119 u16 dport = args->dport;
120 FILTER_DPORT
121
122 // calculate delta
123 u64 *tsp, delta_us;
124 tsp = last.lookup(&sk);
125 if (tsp == 0)
126 delta_us = 0;
127 else
128 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
129
130 if (args->family == AF_INET) {
131 struct ipv4_data_t data4 = {
132 .span_us = delta_us,
Marko Myllynenbfbf17e2018-09-11 21:49:58 +0300133 .oldstate = args->oldstate,
134 .newstate = args->newstate };
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700135 data4.skaddr = (u64)args->skaddr;
136 data4.ts_us = bpf_ktime_get_ns() / 1000;
Joe Yin36ce1122018-08-17 06:04:00 +0800137 __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
138 __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700139 // a workaround until data4 compiles with separate lport/dport
140 data4.ports = dport + ((0ULL + lport) << 32);
141 data4.pid = pid;
142
143 bpf_get_current_comm(&data4.task, sizeof(data4.task));
144 ipv4_events.perf_submit(args, &data4, sizeof(data4));
145
146 } else /* 6 */ {
147 struct ipv6_data_t data6 = {
148 .span_us = delta_us,
Marko Myllynenbfbf17e2018-09-11 21:49:58 +0300149 .oldstate = args->oldstate,
150 .newstate = args->newstate };
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700151 data6.skaddr = (u64)args->skaddr;
152 data6.ts_us = bpf_ktime_get_ns() / 1000;
Joe Yin36ce1122018-08-17 06:04:00 +0800153 __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr));
154 __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr));
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700155 // a workaround until data6 compiles with separate lport/dport
156 data6.ports = dport + ((0ULL + lport) << 32);
157 data6.pid = pid;
158 bpf_get_current_comm(&data6.task, sizeof(data6.task));
159 ipv6_events.perf_submit(args, &data6, sizeof(data6));
160 }
161
162 u64 ts = bpf_ktime_get_ns();
163 last.update(&sk, &ts);
164
165 return 0;
166}
167"""
168
169if (not BPF.tracepoint_exists("sock", "inet_sock_set_state")):
170 print("ERROR: tracepoint sock:inet_sock_set_state missing "
171 "(added in Linux 4.16). Exiting")
172 exit()
173
174# code substitutions
175if args.remoteport:
176 dports = [int(dport) for dport in args.remoteport.split(',')]
177 dports_if = ' && '.join(['dport != %d' % dport for dport in dports])
178 bpf_text = bpf_text.replace('FILTER_DPORT',
179 'if (%s) { last.delete(&sk); return 0; }' % dports_if)
180if args.localport:
181 lports = [int(lport) for lport in args.localport.split(',')]
182 lports_if = ' && '.join(['lport != %d' % lport for lport in lports])
183 bpf_text = bpf_text.replace('FILTER_LPORT',
184 'if (%s) { last.delete(&sk); return 0; }' % lports_if)
185bpf_text = bpf_text.replace('FILTER_DPORT', '')
186bpf_text = bpf_text.replace('FILTER_LPORT', '')
187
188if debug or args.ebpf:
189 print(bpf_text)
190 if args.ebpf:
191 exit()
192
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700193#
194# Setup output formats
195#
196# Don't change the default output (next 2 lines): this fits in 80 chars. I
197# know it doesn't have NS or UIDs etc. I know. If you really, really, really
198# need to add columns, columns that solve real actual problems, I'd start by
199# adding an extended mode (-x) to included those columns.
200#
201header_string = "%-16s %-5s %-10.10s %s%-15s %-5s %-15s %-5s %-11s -> %-11s %s"
202format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
203 "-> %-11s %.3f")
204if args.wide:
205 header_string = ("%-16s %-5s %-16.16s %-2s %-26s %-5s %-26s %-5s %-11s " +
206 "-> %-11s %s")
207 format_string = ("%-16x %-5d %-16.16s %-2s %-26s %-5s %-26s %-5d %-11s " +
208 "-> %-11s %.3f")
209if args.csv:
210 header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
211 format_string = "%x,%d,%s,%s,%s,%s,%s,%d,%s,%s,%.3f"
212
Gerald Combsabdca972018-11-26 23:37:24 -0700213if args.journal:
214 try:
215 from systemd import journal
216 except ImportError:
217 print("ERROR: Journal logging requires the systemd.journal module")
218 exit(1)
219
220
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700221def tcpstate2str(state):
222 # from include/net/tcp_states.h:
223 tcpstate = {
224 1: "ESTABLISHED",
225 2: "SYN_SENT",
226 3: "SYN_RECV",
227 4: "FIN_WAIT1",
228 5: "FIN_WAIT2",
229 6: "TIME_WAIT",
230 7: "CLOSE",
231 8: "CLOSE_WAIT",
232 9: "LAST_ACK",
233 10: "LISTEN",
234 11: "CLOSING",
235 12: "NEW_SYN_RECV",
236 }
237
238 if state in tcpstate:
239 return tcpstate[state]
240 else:
241 return str(state)
242
Gerald Combsabdca972018-11-26 23:37:24 -0700243def journal_fields(event, addr_family):
244 addr_pfx = 'IPV4'
245 if addr_family == AF_INET6:
246 addr_pfx = 'IPV6'
247
248 fields = {
249 # Standard fields described in systemd.journal-fields(7). journal.send
250 # will fill in CODE_LINE, CODE_FILE, and CODE_FUNC for us. If we're
251 # root and specify OBJECT_PID, systemd-journald will add other OBJECT_*
252 # fields for us.
253 'SYSLOG_IDENTIFIER': 'tcpstates',
254 'PRIORITY': 5,
255 '_SOURCE_REALTIME_TIMESTAMP': time() * 1000000,
256 'OBJECT_PID': str(event.pid),
257 'OBJECT_COMM': event.task.decode('utf-8', 'replace'),
258 # Custom fields, aka "stuff we sort of made up".
259 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
260 'OBJECT_TCP_SOURCE_PORT': str(event.ports >> 32),
261 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)),
262 'OBJECT_TCP_DESTINATION_PORT': str(event.ports & 0xffffffff),
263 'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate),
264 'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate),
265 'OBJECT_TCP_SPAN_TIME': str(event.span_us)
266 }
267
268 msg_format_string = (u"%(OBJECT_COMM)s " +
269 u"%(OBJECT_" + addr_pfx + "_SOURCE_ADDRESS)s " +
270 u"%(OBJECT_TCP_SOURCE_PORT)s → " +
271 u"%(OBJECT_" + addr_pfx + "_DESTINATION_ADDRESS)s " +
272 u"%(OBJECT_TCP_DESTINATION_PORT)s " +
273 u"%(OBJECT_TCP_OLD_STATE)s → %(OBJECT_TCP_NEW_STATE)s")
274 fields['MESSAGE'] = msg_format_string % (fields)
275
276 if getuid() == 0:
277 del fields['OBJECT_COMM'] # Handled by systemd-journald
278
279 return fields
280
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700281# process event
282def print_ipv4_event(cpu, data, size):
Xiaozhou Liu51d62d32019-02-15 13:03:05 +0800283 event = b["ipv4_events"].event(data)
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700284 global start_ts
285 if args.time:
286 if args.csv:
287 print("%s," % strftime("%H:%M:%S"), end="")
288 else:
289 print("%-8s " % strftime("%H:%M:%S"), end="")
290 if args.timestamp:
291 if start_ts == 0:
292 start_ts = event.ts_us
293 delta_s = (float(event.ts_us) - start_ts) / 1000000
294 if args.csv:
295 print("%.6f," % delta_s, end="")
296 else:
297 print("%-9.6f " % delta_s, end="")
jeromemarchandb96ebcd2018-10-10 01:58:15 +0200298 print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700299 "4" if args.wide or args.csv else "",
300 inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 32,
301 inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffffffff,
302 tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
303 float(event.span_us) / 1000))
Gerald Combsabdca972018-11-26 23:37:24 -0700304 if args.journal:
305 journal.send(**journal_fields(event, AF_INET))
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700306
307def print_ipv6_event(cpu, data, size):
Xiaozhou Liu51d62d32019-02-15 13:03:05 +0800308 event = b["ipv6_events"].event(data)
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700309 global start_ts
310 if args.time:
311 if args.csv:
312 print("%s," % strftime("%H:%M:%S"), end="")
313 else:
314 print("%-8s " % strftime("%H:%M:%S"), end="")
315 if args.timestamp:
316 if start_ts == 0:
317 start_ts = event.ts_us
318 delta_s = (float(event.ts_us) - start_ts) / 1000000
319 if args.csv:
320 print("%.6f," % delta_s, end="")
321 else:
322 print("%-9.6f " % delta_s, end="")
jeromemarchandb96ebcd2018-10-10 01:58:15 +0200323 print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700324 "6" if args.wide or args.csv else "",
325 inet_ntop(AF_INET6, event.saddr), event.ports >> 32,
326 inet_ntop(AF_INET6, event.daddr), event.ports & 0xffffffff,
327 tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
328 float(event.span_us) / 1000))
Gerald Combsabdca972018-11-26 23:37:24 -0700329 if args.journal:
330 journal.send(**journal_fields(event, AF_INET6))
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700331
332# initialize BPF
333b = BPF(text=bpf_text)
334
335# header
336if args.time:
337 if args.csv:
338 print("%s," % ("TIME"), end="")
339 else:
340 print("%-8s " % ("TIME"), end="")
341if args.timestamp:
342 if args.csv:
343 print("%s," % ("TIME(s)"), end="")
344 else:
345 print("%-9s " % ("TIME(s)"), end="")
346print(header_string % ("SKADDR", "C-PID", "C-COMM",
347 "IP" if args.wide or args.csv else "",
348 "LADDR", "LPORT", "RADDR", "RPORT",
349 "OLDSTATE", "NEWSTATE", "MS"))
350
351start_ts = 0
352
353# read events
354b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64)
355b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64)
356while 1:
Jerome Marchand51671272018-12-19 01:57:24 +0100357 try:
358 b.perf_buffer_poll()
359 except KeyboardInterrupt:
360 exit()