blob: 736de97aeea956133fbb24d606b2d63f8ba8a768 [file] [log] [blame]
Alexey Ivanov777e8022019-01-03 13:46:38 -08001#!/usr/bin/env python
Gerald Combsabdca972018-11-26 23:37:24 -07002# -*- coding: utf-8 -*-
Brendan Greggbbd9acd2018-03-20 18:35:12 -07003# @lint-avoid-python-3-compatibility-imports
4#
5# tcpstates Trace the TCP session state changes with durations.
6# For Linux, uses BCC, BPF. Embedded C.
7#
8# USAGE: tcpstates [-h] [-C] [-S] [interval [count]]
9#
10# This uses the sock:inet_sock_set_state tracepoint, added to Linux 4.16.
11# Linux 4.16 also adds more state transitions so that they can be traced.
12#
13# Copyright 2018 Netflix, Inc.
14# Licensed under the Apache License, Version 2.0 (the "License")
15#
16# 20-Mar-2018 Brendan Gregg Created this.
17
18from __future__ import print_function
19from bcc import BPF
20import argparse
21from socket import inet_ntop, AF_INET, AF_INET6
22from struct import pack
23import ctypes as ct
Gerald Combsabdca972018-11-26 23:37:24 -070024from time import strftime, time
25from os import getuid
Brendan Greggbbd9acd2018-03-20 18:35:12 -070026
27# arguments
28examples = """examples:
29 ./tcpstates # trace all TCP state changes
30 ./tcpstates -t # include timestamp column
31 ./tcpstates -T # include time column (HH:MM:SS)
32 ./tcpstates -w # wider colums (fit IPv6)
33 ./tcpstates -stT # csv output, with times & timestamps
Gerald Combsabdca972018-11-26 23:37:24 -070034 ./tcpstates -Y # log events to the systemd journal
Brendan Greggbbd9acd2018-03-20 18:35:12 -070035 ./tcpstates -L 80 # only trace local port 80
36 ./tcpstates -L 80,81 # only trace local ports 80 and 81
37 ./tcpstates -D 80 # only trace remote port 80
38"""
39parser = argparse.ArgumentParser(
40 description="Trace TCP session state changes and durations",
41 formatter_class=argparse.RawDescriptionHelpFormatter,
42 epilog=examples)
43parser.add_argument("-T", "--time", action="store_true",
44 help="include time column on output (HH:MM:SS)")
45parser.add_argument("-t", "--timestamp", action="store_true",
46 help="include timestamp on output (seconds)")
47parser.add_argument("-w", "--wide", action="store_true",
48 help="wide column output (fits IPv6 addresses)")
49parser.add_argument("-s", "--csv", action="store_true",
50 help="comma separated values output")
51parser.add_argument("-L", "--localport",
52 help="comma-separated list of local ports to trace.")
53parser.add_argument("-D", "--remoteport",
54 help="comma-separated list of remote ports to trace.")
55parser.add_argument("--ebpf", action="store_true",
56 help=argparse.SUPPRESS)
Gerald Combsabdca972018-11-26 23:37:24 -070057parser.add_argument("-Y", "--journal", action="store_true",
58 help="log session state changes to the systemd journal")
Brendan Greggbbd9acd2018-03-20 18:35:12 -070059args = parser.parse_args()
60debug = 0
61
62# define BPF program
63bpf_text = """
64#include <uapi/linux/ptrace.h>
65#define KBUILD_MODNAME "foo"
66#include <linux/tcp.h>
67#include <net/sock.h>
68#include <bcc/proto.h>
69
70BPF_HASH(last, struct sock *, u64);
71
72// separate data structs for ipv4 and ipv6
73struct ipv4_data_t {
Brendan Greggbbd9acd2018-03-20 18:35:12 -070074 u64 ts_us;
75 u64 skaddr;
Joe Yin36ce1122018-08-17 06:04:00 +080076 u32 saddr;
77 u32 daddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070078 u64 span_us;
Brendan Gregg2b23de62018-03-21 15:41:16 -070079 u32 pid;
80 u32 ports;
81 u32 oldstate;
82 u32 newstate;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070083 char task[TASK_COMM_LEN];
84};
85BPF_PERF_OUTPUT(ipv4_events);
86
87struct ipv6_data_t {
88 u64 ts_us;
89 u64 skaddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070090 unsigned __int128 saddr;
91 unsigned __int128 daddr;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070092 u64 span_us;
Brendan Gregg2b23de62018-03-21 15:41:16 -070093 u32 pid;
94 u32 ports;
95 u32 oldstate;
96 u32 newstate;
Brendan Greggbbd9acd2018-03-20 18:35:12 -070097 char task[TASK_COMM_LEN];
98};
99BPF_PERF_OUTPUT(ipv6_events);
100
101struct id_t {
102 u32 pid;
103 char task[TASK_COMM_LEN];
104};
105
106TRACEPOINT_PROBE(sock, inet_sock_set_state)
107{
108 if (args->protocol != IPPROTO_TCP)
109 return 0;
110
111 u32 pid = bpf_get_current_pid_tgid() >> 32;
112 // sk is used as a UUID
113 struct sock *sk = (struct sock *)args->skaddr;
114
115 // lport is either used in a filter here, or later
116 u16 lport = args->sport;
117 FILTER_LPORT
118
119 // dport is either used in a filter here, or later
120 u16 dport = args->dport;
121 FILTER_DPORT
122
123 // calculate delta
124 u64 *tsp, delta_us;
125 tsp = last.lookup(&sk);
126 if (tsp == 0)
127 delta_us = 0;
128 else
129 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
130
131 if (args->family == AF_INET) {
132 struct ipv4_data_t data4 = {
133 .span_us = delta_us,
Marko Myllynenbfbf17e2018-09-11 21:49:58 +0300134 .oldstate = args->oldstate,
135 .newstate = args->newstate };
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700136 data4.skaddr = (u64)args->skaddr;
137 data4.ts_us = bpf_ktime_get_ns() / 1000;
Joe Yin36ce1122018-08-17 06:04:00 +0800138 __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
139 __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700140 // a workaround until data4 compiles with separate lport/dport
141 data4.ports = dport + ((0ULL + lport) << 32);
142 data4.pid = pid;
143
144 bpf_get_current_comm(&data4.task, sizeof(data4.task));
145 ipv4_events.perf_submit(args, &data4, sizeof(data4));
146
147 } else /* 6 */ {
148 struct ipv6_data_t data6 = {
149 .span_us = delta_us,
Marko Myllynenbfbf17e2018-09-11 21:49:58 +0300150 .oldstate = args->oldstate,
151 .newstate = args->newstate };
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700152 data6.skaddr = (u64)args->skaddr;
153 data6.ts_us = bpf_ktime_get_ns() / 1000;
Joe Yin36ce1122018-08-17 06:04:00 +0800154 __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr));
155 __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr));
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700156 // a workaround until data6 compiles with separate lport/dport
157 data6.ports = dport + ((0ULL + lport) << 32);
158 data6.pid = pid;
159 bpf_get_current_comm(&data6.task, sizeof(data6.task));
160 ipv6_events.perf_submit(args, &data6, sizeof(data6));
161 }
162
163 u64 ts = bpf_ktime_get_ns();
164 last.update(&sk, &ts);
165
166 return 0;
167}
168"""
169
170if (not BPF.tracepoint_exists("sock", "inet_sock_set_state")):
171 print("ERROR: tracepoint sock:inet_sock_set_state missing "
172 "(added in Linux 4.16). Exiting")
173 exit()
174
175# code substitutions
176if args.remoteport:
177 dports = [int(dport) for dport in args.remoteport.split(',')]
178 dports_if = ' && '.join(['dport != %d' % dport for dport in dports])
179 bpf_text = bpf_text.replace('FILTER_DPORT',
180 'if (%s) { last.delete(&sk); return 0; }' % dports_if)
181if args.localport:
182 lports = [int(lport) for lport in args.localport.split(',')]
183 lports_if = ' && '.join(['lport != %d' % lport for lport in lports])
184 bpf_text = bpf_text.replace('FILTER_LPORT',
185 'if (%s) { last.delete(&sk); return 0; }' % lports_if)
186bpf_text = bpf_text.replace('FILTER_DPORT', '')
187bpf_text = bpf_text.replace('FILTER_LPORT', '')
188
189if debug or args.ebpf:
190 print(bpf_text)
191 if args.ebpf:
192 exit()
193
194# event data
195TASK_COMM_LEN = 16 # linux/sched.h
196
197class Data_ipv4(ct.Structure):
198 _fields_ = [
199 ("ts_us", ct.c_ulonglong),
200 ("skaddr", ct.c_ulonglong),
Joe Yin36ce1122018-08-17 06:04:00 +0800201 ("saddr", ct.c_uint),
202 ("daddr", ct.c_uint),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700203 ("span_us", ct.c_ulonglong),
Brendan Gregg2b23de62018-03-21 15:41:16 -0700204 ("pid", ct.c_uint),
205 ("ports", ct.c_uint),
206 ("oldstate", ct.c_uint),
207 ("newstate", ct.c_uint),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700208 ("task", ct.c_char * TASK_COMM_LEN)
209 ]
210
211class Data_ipv6(ct.Structure):
212 _fields_ = [
213 ("ts_us", ct.c_ulonglong),
214 ("skaddr", ct.c_ulonglong),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700215 ("saddr", (ct.c_ulonglong * 2)),
216 ("daddr", (ct.c_ulonglong * 2)),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700217 ("span_us", ct.c_ulonglong),
Brendan Gregg2b23de62018-03-21 15:41:16 -0700218 ("pid", ct.c_uint),
219 ("ports", ct.c_uint),
220 ("oldstate", ct.c_uint),
221 ("newstate", ct.c_uint),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700222 ("task", ct.c_char * TASK_COMM_LEN)
223 ]
224
225#
226# Setup output formats
227#
228# Don't change the default output (next 2 lines): this fits in 80 chars. I
229# know it doesn't have NS or UIDs etc. I know. If you really, really, really
230# need to add columns, columns that solve real actual problems, I'd start by
231# adding an extended mode (-x) to included those columns.
232#
233header_string = "%-16s %-5s %-10.10s %s%-15s %-5s %-15s %-5s %-11s -> %-11s %s"
234format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
235 "-> %-11s %.3f")
236if args.wide:
237 header_string = ("%-16s %-5s %-16.16s %-2s %-26s %-5s %-26s %-5s %-11s " +
238 "-> %-11s %s")
239 format_string = ("%-16x %-5d %-16.16s %-2s %-26s %-5s %-26s %-5d %-11s " +
240 "-> %-11s %.3f")
241if args.csv:
242 header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
243 format_string = "%x,%d,%s,%s,%s,%s,%s,%d,%s,%s,%.3f"
244
Gerald Combsabdca972018-11-26 23:37:24 -0700245if args.journal:
246 try:
247 from systemd import journal
248 except ImportError:
249 print("ERROR: Journal logging requires the systemd.journal module")
250 exit(1)
251
252
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700253def tcpstate2str(state):
254 # from include/net/tcp_states.h:
255 tcpstate = {
256 1: "ESTABLISHED",
257 2: "SYN_SENT",
258 3: "SYN_RECV",
259 4: "FIN_WAIT1",
260 5: "FIN_WAIT2",
261 6: "TIME_WAIT",
262 7: "CLOSE",
263 8: "CLOSE_WAIT",
264 9: "LAST_ACK",
265 10: "LISTEN",
266 11: "CLOSING",
267 12: "NEW_SYN_RECV",
268 }
269
270 if state in tcpstate:
271 return tcpstate[state]
272 else:
273 return str(state)
274
Gerald Combsabdca972018-11-26 23:37:24 -0700275def journal_fields(event, addr_family):
276 addr_pfx = 'IPV4'
277 if addr_family == AF_INET6:
278 addr_pfx = 'IPV6'
279
280 fields = {
281 # Standard fields described in systemd.journal-fields(7). journal.send
282 # will fill in CODE_LINE, CODE_FILE, and CODE_FUNC for us. If we're
283 # root and specify OBJECT_PID, systemd-journald will add other OBJECT_*
284 # fields for us.
285 'SYSLOG_IDENTIFIER': 'tcpstates',
286 'PRIORITY': 5,
287 '_SOURCE_REALTIME_TIMESTAMP': time() * 1000000,
288 'OBJECT_PID': str(event.pid),
289 'OBJECT_COMM': event.task.decode('utf-8', 'replace'),
290 # Custom fields, aka "stuff we sort of made up".
291 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
292 'OBJECT_TCP_SOURCE_PORT': str(event.ports >> 32),
293 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)),
294 'OBJECT_TCP_DESTINATION_PORT': str(event.ports & 0xffffffff),
295 'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate),
296 'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate),
297 'OBJECT_TCP_SPAN_TIME': str(event.span_us)
298 }
299
300 msg_format_string = (u"%(OBJECT_COMM)s " +
301 u"%(OBJECT_" + addr_pfx + "_SOURCE_ADDRESS)s " +
302 u"%(OBJECT_TCP_SOURCE_PORT)s → " +
303 u"%(OBJECT_" + addr_pfx + "_DESTINATION_ADDRESS)s " +
304 u"%(OBJECT_TCP_DESTINATION_PORT)s " +
305 u"%(OBJECT_TCP_OLD_STATE)s → %(OBJECT_TCP_NEW_STATE)s")
306 fields['MESSAGE'] = msg_format_string % (fields)
307
308 if getuid() == 0:
309 del fields['OBJECT_COMM'] # Handled by systemd-journald
310
311 return fields
312
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700313# process event
314def print_ipv4_event(cpu, data, size):
315 event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
316 global start_ts
317 if args.time:
318 if args.csv:
319 print("%s," % strftime("%H:%M:%S"), end="")
320 else:
321 print("%-8s " % strftime("%H:%M:%S"), end="")
322 if args.timestamp:
323 if start_ts == 0:
324 start_ts = event.ts_us
325 delta_s = (float(event.ts_us) - start_ts) / 1000000
326 if args.csv:
327 print("%.6f," % delta_s, end="")
328 else:
329 print("%-9.6f " % delta_s, end="")
jeromemarchandb96ebcd2018-10-10 01:58:15 +0200330 print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700331 "4" if args.wide or args.csv else "",
332 inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 32,
333 inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffffffff,
334 tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
335 float(event.span_us) / 1000))
Gerald Combsabdca972018-11-26 23:37:24 -0700336 if args.journal:
337 journal.send(**journal_fields(event, AF_INET))
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700338
339def print_ipv6_event(cpu, data, size):
340 event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
341 global start_ts
342 if args.time:
343 if args.csv:
344 print("%s," % strftime("%H:%M:%S"), end="")
345 else:
346 print("%-8s " % strftime("%H:%M:%S"), end="")
347 if args.timestamp:
348 if start_ts == 0:
349 start_ts = event.ts_us
350 delta_s = (float(event.ts_us) - start_ts) / 1000000
351 if args.csv:
352 print("%.6f," % delta_s, end="")
353 else:
354 print("%-9.6f " % delta_s, end="")
jeromemarchandb96ebcd2018-10-10 01:58:15 +0200355 print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700356 "6" if args.wide or args.csv else "",
357 inet_ntop(AF_INET6, event.saddr), event.ports >> 32,
358 inet_ntop(AF_INET6, event.daddr), event.ports & 0xffffffff,
359 tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
360 float(event.span_us) / 1000))
Gerald Combsabdca972018-11-26 23:37:24 -0700361 if args.journal:
362 journal.send(**journal_fields(event, AF_INET6))
Brendan Greggbbd9acd2018-03-20 18:35:12 -0700363
364# initialize BPF
365b = BPF(text=bpf_text)
366
367# header
368if args.time:
369 if args.csv:
370 print("%s," % ("TIME"), end="")
371 else:
372 print("%-8s " % ("TIME"), end="")
373if args.timestamp:
374 if args.csv:
375 print("%s," % ("TIME(s)"), end="")
376 else:
377 print("%-9s " % ("TIME(s)"), end="")
378print(header_string % ("SKADDR", "C-PID", "C-COMM",
379 "IP" if args.wide or args.csv else "",
380 "LADDR", "LPORT", "RADDR", "RPORT",
381 "OLDSTATE", "NEWSTATE", "MS"))
382
383start_ts = 0
384
385# read events
386b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64)
387b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64)
388while 1:
Jerome Marchand51671272018-12-19 01:57:24 +0100389 try:
390 b.perf_buffer_poll()
391 except KeyboardInterrupt:
392 exit()