Alexey Ivanov | cc01a9c | 2019-01-16 09:50:46 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
Gerald Combs | abdca97 | 2018-11-26 23:37:24 -0700 | [diff] [blame] | 2 | # -*- coding: utf-8 -*- |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 3 | # @lint-avoid-python-3-compatibility-imports |
| 4 | # |
| 5 | # tcpstates Trace the TCP session state changes with durations. |
| 6 | # For Linux, uses BCC, BPF. Embedded C. |
| 7 | # |
| 8 | # USAGE: tcpstates [-h] [-C] [-S] [interval [count]] |
| 9 | # |
| 10 | # This uses the sock:inet_sock_set_state tracepoint, added to Linux 4.16. |
| 11 | # Linux 4.16 also adds more state transitions so that they can be traced. |
| 12 | # |
| 13 | # Copyright 2018 Netflix, Inc. |
| 14 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 15 | # |
| 16 | # 20-Mar-2018 Brendan Gregg Created this. |
| 17 | |
| 18 | from __future__ import print_function |
| 19 | from bcc import BPF |
| 20 | import argparse |
| 21 | from socket import inet_ntop, AF_INET, AF_INET6 |
| 22 | from struct import pack |
Gerald Combs | abdca97 | 2018-11-26 23:37:24 -0700 | [diff] [blame] | 23 | from time import strftime, time |
| 24 | from os import getuid |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 25 | |
| 26 | # arguments |
| 27 | examples = """examples: |
| 28 | ./tcpstates # trace all TCP state changes |
| 29 | ./tcpstates -t # include timestamp column |
| 30 | ./tcpstates -T # include time column (HH:MM:SS) |
| 31 | ./tcpstates -w # wider colums (fit IPv6) |
| 32 | ./tcpstates -stT # csv output, with times & timestamps |
Gerald Combs | abdca97 | 2018-11-26 23:37:24 -0700 | [diff] [blame] | 33 | ./tcpstates -Y # log events to the systemd journal |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 34 | ./tcpstates -L 80 # only trace local port 80 |
| 35 | ./tcpstates -L 80,81 # only trace local ports 80 and 81 |
| 36 | ./tcpstates -D 80 # only trace remote port 80 |
| 37 | """ |
| 38 | parser = argparse.ArgumentParser( |
| 39 | description="Trace TCP session state changes and durations", |
| 40 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 41 | epilog=examples) |
| 42 | parser.add_argument("-T", "--time", action="store_true", |
| 43 | help="include time column on output (HH:MM:SS)") |
| 44 | parser.add_argument("-t", "--timestamp", action="store_true", |
| 45 | help="include timestamp on output (seconds)") |
| 46 | parser.add_argument("-w", "--wide", action="store_true", |
| 47 | help="wide column output (fits IPv6 addresses)") |
| 48 | parser.add_argument("-s", "--csv", action="store_true", |
| 49 | help="comma separated values output") |
| 50 | parser.add_argument("-L", "--localport", |
| 51 | help="comma-separated list of local ports to trace.") |
| 52 | parser.add_argument("-D", "--remoteport", |
| 53 | help="comma-separated list of remote ports to trace.") |
| 54 | parser.add_argument("--ebpf", action="store_true", |
| 55 | help=argparse.SUPPRESS) |
Gerald Combs | abdca97 | 2018-11-26 23:37:24 -0700 | [diff] [blame] | 56 | parser.add_argument("-Y", "--journal", action="store_true", |
| 57 | help="log session state changes to the systemd journal") |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 58 | args = parser.parse_args() |
| 59 | debug = 0 |
| 60 | |
| 61 | # define BPF program |
| 62 | bpf_text = """ |
| 63 | #include <uapi/linux/ptrace.h> |
| 64 | #define KBUILD_MODNAME "foo" |
| 65 | #include <linux/tcp.h> |
| 66 | #include <net/sock.h> |
| 67 | #include <bcc/proto.h> |
| 68 | |
| 69 | BPF_HASH(last, struct sock *, u64); |
| 70 | |
| 71 | // separate data structs for ipv4 and ipv6 |
| 72 | struct ipv4_data_t { |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 73 | u64 ts_us; |
| 74 | u64 skaddr; |
Joe Yin | 36ce112 | 2018-08-17 06:04:00 +0800 | [diff] [blame] | 75 | u32 saddr; |
| 76 | u32 daddr; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 77 | u64 span_us; |
Brendan Gregg | 2b23de6 | 2018-03-21 15:41:16 -0700 | [diff] [blame] | 78 | u32 pid; |
| 79 | u32 ports; |
| 80 | u32 oldstate; |
| 81 | u32 newstate; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 82 | char task[TASK_COMM_LEN]; |
| 83 | }; |
| 84 | BPF_PERF_OUTPUT(ipv4_events); |
| 85 | |
| 86 | struct ipv6_data_t { |
| 87 | u64 ts_us; |
| 88 | u64 skaddr; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 89 | unsigned __int128 saddr; |
| 90 | unsigned __int128 daddr; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 91 | u64 span_us; |
Brendan Gregg | 2b23de6 | 2018-03-21 15:41:16 -0700 | [diff] [blame] | 92 | u32 pid; |
| 93 | u32 ports; |
| 94 | u32 oldstate; |
| 95 | u32 newstate; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 96 | char task[TASK_COMM_LEN]; |
| 97 | }; |
| 98 | BPF_PERF_OUTPUT(ipv6_events); |
| 99 | |
| 100 | struct id_t { |
| 101 | u32 pid; |
| 102 | char task[TASK_COMM_LEN]; |
| 103 | }; |
| 104 | |
| 105 | TRACEPOINT_PROBE(sock, inet_sock_set_state) |
| 106 | { |
| 107 | if (args->protocol != IPPROTO_TCP) |
| 108 | return 0; |
| 109 | |
| 110 | u32 pid = bpf_get_current_pid_tgid() >> 32; |
| 111 | // sk is used as a UUID |
| 112 | struct sock *sk = (struct sock *)args->skaddr; |
| 113 | |
| 114 | // lport is either used in a filter here, or later |
| 115 | u16 lport = args->sport; |
| 116 | FILTER_LPORT |
| 117 | |
| 118 | // dport is either used in a filter here, or later |
| 119 | u16 dport = args->dport; |
| 120 | FILTER_DPORT |
| 121 | |
| 122 | // calculate delta |
| 123 | u64 *tsp, delta_us; |
| 124 | tsp = last.lookup(&sk); |
| 125 | if (tsp == 0) |
| 126 | delta_us = 0; |
| 127 | else |
| 128 | delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; |
| 129 | |
| 130 | if (args->family == AF_INET) { |
| 131 | struct ipv4_data_t data4 = { |
| 132 | .span_us = delta_us, |
Marko Myllynen | bfbf17e | 2018-09-11 21:49:58 +0300 | [diff] [blame] | 133 | .oldstate = args->oldstate, |
| 134 | .newstate = args->newstate }; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 135 | data4.skaddr = (u64)args->skaddr; |
| 136 | data4.ts_us = bpf_ktime_get_ns() / 1000; |
Joe Yin | 36ce112 | 2018-08-17 06:04:00 +0800 | [diff] [blame] | 137 | __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr)); |
| 138 | __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr)); |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 139 | // a workaround until data4 compiles with separate lport/dport |
| 140 | data4.ports = dport + ((0ULL + lport) << 32); |
| 141 | data4.pid = pid; |
| 142 | |
| 143 | bpf_get_current_comm(&data4.task, sizeof(data4.task)); |
| 144 | ipv4_events.perf_submit(args, &data4, sizeof(data4)); |
| 145 | |
| 146 | } else /* 6 */ { |
| 147 | struct ipv6_data_t data6 = { |
| 148 | .span_us = delta_us, |
Marko Myllynen | bfbf17e | 2018-09-11 21:49:58 +0300 | [diff] [blame] | 149 | .oldstate = args->oldstate, |
| 150 | .newstate = args->newstate }; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 151 | data6.skaddr = (u64)args->skaddr; |
| 152 | data6.ts_us = bpf_ktime_get_ns() / 1000; |
Joe Yin | 36ce112 | 2018-08-17 06:04:00 +0800 | [diff] [blame] | 153 | __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr)); |
| 154 | __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr)); |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 155 | // a workaround until data6 compiles with separate lport/dport |
| 156 | data6.ports = dport + ((0ULL + lport) << 32); |
| 157 | data6.pid = pid; |
| 158 | bpf_get_current_comm(&data6.task, sizeof(data6.task)); |
| 159 | ipv6_events.perf_submit(args, &data6, sizeof(data6)); |
| 160 | } |
| 161 | |
| 162 | u64 ts = bpf_ktime_get_ns(); |
| 163 | last.update(&sk, &ts); |
| 164 | |
| 165 | return 0; |
| 166 | } |
| 167 | """ |
| 168 | |
| 169 | if (not BPF.tracepoint_exists("sock", "inet_sock_set_state")): |
| 170 | print("ERROR: tracepoint sock:inet_sock_set_state missing " |
| 171 | "(added in Linux 4.16). Exiting") |
| 172 | exit() |
| 173 | |
| 174 | # code substitutions |
| 175 | if args.remoteport: |
| 176 | dports = [int(dport) for dport in args.remoteport.split(',')] |
| 177 | dports_if = ' && '.join(['dport != %d' % dport for dport in dports]) |
| 178 | bpf_text = bpf_text.replace('FILTER_DPORT', |
| 179 | 'if (%s) { last.delete(&sk); return 0; }' % dports_if) |
| 180 | if args.localport: |
| 181 | lports = [int(lport) for lport in args.localport.split(',')] |
| 182 | lports_if = ' && '.join(['lport != %d' % lport for lport in lports]) |
| 183 | bpf_text = bpf_text.replace('FILTER_LPORT', |
| 184 | 'if (%s) { last.delete(&sk); return 0; }' % lports_if) |
| 185 | bpf_text = bpf_text.replace('FILTER_DPORT', '') |
| 186 | bpf_text = bpf_text.replace('FILTER_LPORT', '') |
| 187 | |
| 188 | if debug or args.ebpf: |
| 189 | print(bpf_text) |
| 190 | if args.ebpf: |
| 191 | exit() |
| 192 | |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 193 | # |
| 194 | # Setup output formats |
| 195 | # |
| 196 | # Don't change the default output (next 2 lines): this fits in 80 chars. I |
| 197 | # know it doesn't have NS or UIDs etc. I know. If you really, really, really |
| 198 | # need to add columns, columns that solve real actual problems, I'd start by |
| 199 | # adding an extended mode (-x) to included those columns. |
| 200 | # |
| 201 | header_string = "%-16s %-5s %-10.10s %s%-15s %-5s %-15s %-5s %-11s -> %-11s %s" |
| 202 | format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " + |
| 203 | "-> %-11s %.3f") |
| 204 | if args.wide: |
| 205 | header_string = ("%-16s %-5s %-16.16s %-2s %-26s %-5s %-26s %-5s %-11s " + |
| 206 | "-> %-11s %s") |
| 207 | format_string = ("%-16x %-5d %-16.16s %-2s %-26s %-5s %-26s %-5d %-11s " + |
| 208 | "-> %-11s %.3f") |
| 209 | if args.csv: |
| 210 | header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" |
| 211 | format_string = "%x,%d,%s,%s,%s,%s,%s,%d,%s,%s,%.3f" |
| 212 | |
Gerald Combs | abdca97 | 2018-11-26 23:37:24 -0700 | [diff] [blame] | 213 | if args.journal: |
| 214 | try: |
| 215 | from systemd import journal |
| 216 | except ImportError: |
| 217 | print("ERROR: Journal logging requires the systemd.journal module") |
| 218 | exit(1) |
| 219 | |
| 220 | |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 221 | def tcpstate2str(state): |
| 222 | # from include/net/tcp_states.h: |
| 223 | tcpstate = { |
| 224 | 1: "ESTABLISHED", |
| 225 | 2: "SYN_SENT", |
| 226 | 3: "SYN_RECV", |
| 227 | 4: "FIN_WAIT1", |
| 228 | 5: "FIN_WAIT2", |
| 229 | 6: "TIME_WAIT", |
| 230 | 7: "CLOSE", |
| 231 | 8: "CLOSE_WAIT", |
| 232 | 9: "LAST_ACK", |
| 233 | 10: "LISTEN", |
| 234 | 11: "CLOSING", |
| 235 | 12: "NEW_SYN_RECV", |
| 236 | } |
| 237 | |
| 238 | if state in tcpstate: |
| 239 | return tcpstate[state] |
| 240 | else: |
| 241 | return str(state) |
| 242 | |
Gerald Combs | abdca97 | 2018-11-26 23:37:24 -0700 | [diff] [blame] | 243 | def journal_fields(event, addr_family): |
| 244 | addr_pfx = 'IPV4' |
| 245 | if addr_family == AF_INET6: |
| 246 | addr_pfx = 'IPV6' |
| 247 | |
| 248 | fields = { |
| 249 | # Standard fields described in systemd.journal-fields(7). journal.send |
| 250 | # will fill in CODE_LINE, CODE_FILE, and CODE_FUNC for us. If we're |
| 251 | # root and specify OBJECT_PID, systemd-journald will add other OBJECT_* |
| 252 | # fields for us. |
| 253 | 'SYSLOG_IDENTIFIER': 'tcpstates', |
| 254 | 'PRIORITY': 5, |
| 255 | '_SOURCE_REALTIME_TIMESTAMP': time() * 1000000, |
| 256 | 'OBJECT_PID': str(event.pid), |
| 257 | 'OBJECT_COMM': event.task.decode('utf-8', 'replace'), |
| 258 | # Custom fields, aka "stuff we sort of made up". |
| 259 | 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)), |
| 260 | 'OBJECT_TCP_SOURCE_PORT': str(event.ports >> 32), |
| 261 | 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)), |
| 262 | 'OBJECT_TCP_DESTINATION_PORT': str(event.ports & 0xffffffff), |
| 263 | 'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate), |
| 264 | 'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate), |
| 265 | 'OBJECT_TCP_SPAN_TIME': str(event.span_us) |
| 266 | } |
| 267 | |
| 268 | msg_format_string = (u"%(OBJECT_COMM)s " + |
| 269 | u"%(OBJECT_" + addr_pfx + "_SOURCE_ADDRESS)s " + |
| 270 | u"%(OBJECT_TCP_SOURCE_PORT)s → " + |
| 271 | u"%(OBJECT_" + addr_pfx + "_DESTINATION_ADDRESS)s " + |
| 272 | u"%(OBJECT_TCP_DESTINATION_PORT)s " + |
| 273 | u"%(OBJECT_TCP_OLD_STATE)s → %(OBJECT_TCP_NEW_STATE)s") |
| 274 | fields['MESSAGE'] = msg_format_string % (fields) |
| 275 | |
| 276 | if getuid() == 0: |
| 277 | del fields['OBJECT_COMM'] # Handled by systemd-journald |
| 278 | |
| 279 | return fields |
| 280 | |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 281 | # process event |
| 282 | def print_ipv4_event(cpu, data, size): |
Xiaozhou Liu | 51d62d3 | 2019-02-15 13:03:05 +0800 | [diff] [blame] | 283 | event = b["ipv4_events"].event(data) |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 284 | global start_ts |
| 285 | if args.time: |
| 286 | if args.csv: |
| 287 | print("%s," % strftime("%H:%M:%S"), end="") |
| 288 | else: |
| 289 | print("%-8s " % strftime("%H:%M:%S"), end="") |
| 290 | if args.timestamp: |
| 291 | if start_ts == 0: |
| 292 | start_ts = event.ts_us |
| 293 | delta_s = (float(event.ts_us) - start_ts) / 1000000 |
| 294 | if args.csv: |
| 295 | print("%.6f," % delta_s, end="") |
| 296 | else: |
| 297 | print("%-9.6f " % delta_s, end="") |
jeromemarchand | b96ebcd | 2018-10-10 01:58:15 +0200 | [diff] [blame] | 298 | print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'), |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 299 | "4" if args.wide or args.csv else "", |
| 300 | inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 32, |
| 301 | inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffffffff, |
| 302 | tcpstate2str(event.oldstate), tcpstate2str(event.newstate), |
| 303 | float(event.span_us) / 1000)) |
Gerald Combs | abdca97 | 2018-11-26 23:37:24 -0700 | [diff] [blame] | 304 | if args.journal: |
| 305 | journal.send(**journal_fields(event, AF_INET)) |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 306 | |
| 307 | def print_ipv6_event(cpu, data, size): |
Xiaozhou Liu | 51d62d3 | 2019-02-15 13:03:05 +0800 | [diff] [blame] | 308 | event = b["ipv6_events"].event(data) |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 309 | global start_ts |
| 310 | if args.time: |
| 311 | if args.csv: |
| 312 | print("%s," % strftime("%H:%M:%S"), end="") |
| 313 | else: |
| 314 | print("%-8s " % strftime("%H:%M:%S"), end="") |
| 315 | if args.timestamp: |
| 316 | if start_ts == 0: |
| 317 | start_ts = event.ts_us |
| 318 | delta_s = (float(event.ts_us) - start_ts) / 1000000 |
| 319 | if args.csv: |
| 320 | print("%.6f," % delta_s, end="") |
| 321 | else: |
| 322 | print("%-9.6f " % delta_s, end="") |
jeromemarchand | b96ebcd | 2018-10-10 01:58:15 +0200 | [diff] [blame] | 323 | print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'), |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 324 | "6" if args.wide or args.csv else "", |
| 325 | inet_ntop(AF_INET6, event.saddr), event.ports >> 32, |
| 326 | inet_ntop(AF_INET6, event.daddr), event.ports & 0xffffffff, |
| 327 | tcpstate2str(event.oldstate), tcpstate2str(event.newstate), |
| 328 | float(event.span_us) / 1000)) |
Gerald Combs | abdca97 | 2018-11-26 23:37:24 -0700 | [diff] [blame] | 329 | if args.journal: |
| 330 | journal.send(**journal_fields(event, AF_INET6)) |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 331 | |
| 332 | # initialize BPF |
| 333 | b = BPF(text=bpf_text) |
| 334 | |
| 335 | # header |
| 336 | if args.time: |
| 337 | if args.csv: |
| 338 | print("%s," % ("TIME"), end="") |
| 339 | else: |
| 340 | print("%-8s " % ("TIME"), end="") |
| 341 | if args.timestamp: |
| 342 | if args.csv: |
| 343 | print("%s," % ("TIME(s)"), end="") |
| 344 | else: |
| 345 | print("%-9s " % ("TIME(s)"), end="") |
| 346 | print(header_string % ("SKADDR", "C-PID", "C-COMM", |
| 347 | "IP" if args.wide or args.csv else "", |
| 348 | "LADDR", "LPORT", "RADDR", "RPORT", |
| 349 | "OLDSTATE", "NEWSTATE", "MS")) |
| 350 | |
| 351 | start_ts = 0 |
| 352 | |
| 353 | # read events |
| 354 | b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64) |
| 355 | b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64) |
| 356 | while 1: |
Jerome Marchand | 5167127 | 2018-12-19 01:57:24 +0100 | [diff] [blame] | 357 | try: |
| 358 | b.perf_buffer_poll() |
| 359 | except KeyboardInterrupt: |
| 360 | exit() |