Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # @lint-avoid-python-3-compatibility-imports |
| 3 | # |
| 4 | # tcpstates Trace the TCP session state changes with durations. |
| 5 | # For Linux, uses BCC, BPF. Embedded C. |
| 6 | # |
| 7 | # USAGE: tcpstates [-h] [-C] [-S] [interval [count]] |
| 8 | # |
| 9 | # This uses the sock:inet_sock_set_state tracepoint, added to Linux 4.16. |
| 10 | # Linux 4.16 also adds more state transitions so that they can be traced. |
| 11 | # |
| 12 | # Copyright 2018 Netflix, Inc. |
| 13 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 14 | # |
| 15 | # 20-Mar-2018 Brendan Gregg Created this. |
| 16 | |
| 17 | from __future__ import print_function |
| 18 | from bcc import BPF |
| 19 | import argparse |
| 20 | from socket import inet_ntop, AF_INET, AF_INET6 |
| 21 | from struct import pack |
| 22 | import ctypes as ct |
| 23 | from time import strftime |
| 24 | |
| 25 | # arguments |
| 26 | examples = """examples: |
| 27 | ./tcpstates # trace all TCP state changes |
| 28 | ./tcpstates -t # include timestamp column |
| 29 | ./tcpstates -T # include time column (HH:MM:SS) |
| 30 | ./tcpstates -w # wider colums (fit IPv6) |
| 31 | ./tcpstates -stT # csv output, with times & timestamps |
| 32 | ./tcpstates -L 80 # only trace local port 80 |
| 33 | ./tcpstates -L 80,81 # only trace local ports 80 and 81 |
| 34 | ./tcpstates -D 80 # only trace remote port 80 |
| 35 | """ |
| 36 | parser = argparse.ArgumentParser( |
| 37 | description="Trace TCP session state changes and durations", |
| 38 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 39 | epilog=examples) |
| 40 | parser.add_argument("-T", "--time", action="store_true", |
| 41 | help="include time column on output (HH:MM:SS)") |
| 42 | parser.add_argument("-t", "--timestamp", action="store_true", |
| 43 | help="include timestamp on output (seconds)") |
| 44 | parser.add_argument("-w", "--wide", action="store_true", |
| 45 | help="wide column output (fits IPv6 addresses)") |
| 46 | parser.add_argument("-s", "--csv", action="store_true", |
| 47 | help="comma separated values output") |
| 48 | parser.add_argument("-L", "--localport", |
| 49 | help="comma-separated list of local ports to trace.") |
| 50 | parser.add_argument("-D", "--remoteport", |
| 51 | help="comma-separated list of remote ports to trace.") |
| 52 | parser.add_argument("--ebpf", action="store_true", |
| 53 | help=argparse.SUPPRESS) |
| 54 | args = parser.parse_args() |
| 55 | debug = 0 |
| 56 | |
| 57 | # define BPF program |
| 58 | bpf_text = """ |
| 59 | #include <uapi/linux/ptrace.h> |
| 60 | #define KBUILD_MODNAME "foo" |
| 61 | #include <linux/tcp.h> |
| 62 | #include <net/sock.h> |
| 63 | #include <bcc/proto.h> |
| 64 | |
| 65 | BPF_HASH(last, struct sock *, u64); |
| 66 | |
| 67 | // separate data structs for ipv4 and ipv6 |
| 68 | struct ipv4_data_t { |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 69 | u64 ts_us; |
| 70 | u64 skaddr; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 71 | u64 saddr; |
| 72 | u64 daddr; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 73 | u64 span_us; |
Brendan Gregg | 2b23de6 | 2018-03-21 15:41:16 -0700 | [diff] [blame] | 74 | u32 pid; |
| 75 | u32 ports; |
| 76 | u32 oldstate; |
| 77 | u32 newstate; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 78 | char task[TASK_COMM_LEN]; |
| 79 | }; |
| 80 | BPF_PERF_OUTPUT(ipv4_events); |
| 81 | |
| 82 | struct ipv6_data_t { |
| 83 | u64 ts_us; |
| 84 | u64 skaddr; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 85 | unsigned __int128 saddr; |
| 86 | unsigned __int128 daddr; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 87 | u64 span_us; |
Brendan Gregg | 2b23de6 | 2018-03-21 15:41:16 -0700 | [diff] [blame] | 88 | u32 pid; |
| 89 | u32 ports; |
| 90 | u32 oldstate; |
| 91 | u32 newstate; |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 92 | char task[TASK_COMM_LEN]; |
| 93 | }; |
| 94 | BPF_PERF_OUTPUT(ipv6_events); |
| 95 | |
| 96 | struct id_t { |
| 97 | u32 pid; |
| 98 | char task[TASK_COMM_LEN]; |
| 99 | }; |
| 100 | |
| 101 | TRACEPOINT_PROBE(sock, inet_sock_set_state) |
| 102 | { |
| 103 | if (args->protocol != IPPROTO_TCP) |
| 104 | return 0; |
| 105 | |
| 106 | u32 pid = bpf_get_current_pid_tgid() >> 32; |
| 107 | // sk is used as a UUID |
| 108 | struct sock *sk = (struct sock *)args->skaddr; |
| 109 | |
| 110 | // lport is either used in a filter here, or later |
| 111 | u16 lport = args->sport; |
| 112 | FILTER_LPORT |
| 113 | |
| 114 | // dport is either used in a filter here, or later |
| 115 | u16 dport = args->dport; |
| 116 | FILTER_DPORT |
| 117 | |
| 118 | // calculate delta |
| 119 | u64 *tsp, delta_us; |
| 120 | tsp = last.lookup(&sk); |
| 121 | if (tsp == 0) |
| 122 | delta_us = 0; |
| 123 | else |
| 124 | delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; |
| 125 | |
| 126 | if (args->family == AF_INET) { |
| 127 | struct ipv4_data_t data4 = { |
| 128 | .span_us = delta_us, |
| 129 | .oldstate = args->oldstate, .newstate = args->newstate}; |
| 130 | data4.skaddr = (u64)args->skaddr; |
| 131 | data4.ts_us = bpf_ktime_get_ns() / 1000; |
| 132 | bpf_probe_read(&data4.saddr, sizeof(u32), args->saddr); |
| 133 | bpf_probe_read(&data4.daddr, sizeof(u32), args->daddr); |
| 134 | // a workaround until data4 compiles with separate lport/dport |
| 135 | data4.ports = dport + ((0ULL + lport) << 32); |
| 136 | data4.pid = pid; |
| 137 | |
| 138 | bpf_get_current_comm(&data4.task, sizeof(data4.task)); |
| 139 | ipv4_events.perf_submit(args, &data4, sizeof(data4)); |
| 140 | |
| 141 | } else /* 6 */ { |
| 142 | struct ipv6_data_t data6 = { |
| 143 | .span_us = delta_us, |
| 144 | .oldstate = args->oldstate, .newstate = args->newstate}; |
| 145 | data6.skaddr = (u64)args->skaddr; |
| 146 | data6.ts_us = bpf_ktime_get_ns() / 1000; |
| 147 | bpf_probe_read(&data6.saddr, sizeof(data6.saddr), args->saddr_v6); |
| 148 | bpf_probe_read(&data6.daddr, sizeof(data6.daddr), args->saddr_v6); |
| 149 | // a workaround until data6 compiles with separate lport/dport |
| 150 | data6.ports = dport + ((0ULL + lport) << 32); |
| 151 | data6.pid = pid; |
| 152 | bpf_get_current_comm(&data6.task, sizeof(data6.task)); |
| 153 | ipv6_events.perf_submit(args, &data6, sizeof(data6)); |
| 154 | } |
| 155 | |
| 156 | u64 ts = bpf_ktime_get_ns(); |
| 157 | last.update(&sk, &ts); |
| 158 | |
| 159 | return 0; |
| 160 | } |
| 161 | """ |
| 162 | |
| 163 | if (not BPF.tracepoint_exists("sock", "inet_sock_set_state")): |
| 164 | print("ERROR: tracepoint sock:inet_sock_set_state missing " |
| 165 | "(added in Linux 4.16). Exiting") |
| 166 | exit() |
| 167 | |
| 168 | # code substitutions |
| 169 | if args.remoteport: |
| 170 | dports = [int(dport) for dport in args.remoteport.split(',')] |
| 171 | dports_if = ' && '.join(['dport != %d' % dport for dport in dports]) |
| 172 | bpf_text = bpf_text.replace('FILTER_DPORT', |
| 173 | 'if (%s) { last.delete(&sk); return 0; }' % dports_if) |
| 174 | if args.localport: |
| 175 | lports = [int(lport) for lport in args.localport.split(',')] |
| 176 | lports_if = ' && '.join(['lport != %d' % lport for lport in lports]) |
| 177 | bpf_text = bpf_text.replace('FILTER_LPORT', |
| 178 | 'if (%s) { last.delete(&sk); return 0; }' % lports_if) |
| 179 | bpf_text = bpf_text.replace('FILTER_DPORT', '') |
| 180 | bpf_text = bpf_text.replace('FILTER_LPORT', '') |
| 181 | |
| 182 | if debug or args.ebpf: |
| 183 | print(bpf_text) |
| 184 | if args.ebpf: |
| 185 | exit() |
| 186 | |
| 187 | # event data |
| 188 | TASK_COMM_LEN = 16 # linux/sched.h |
| 189 | |
| 190 | class Data_ipv4(ct.Structure): |
| 191 | _fields_ = [ |
| 192 | ("ts_us", ct.c_ulonglong), |
| 193 | ("skaddr", ct.c_ulonglong), |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 194 | ("saddr", ct.c_ulonglong), |
| 195 | ("daddr", ct.c_ulonglong), |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 196 | ("span_us", ct.c_ulonglong), |
Brendan Gregg | 2b23de6 | 2018-03-21 15:41:16 -0700 | [diff] [blame] | 197 | ("pid", ct.c_uint), |
| 198 | ("ports", ct.c_uint), |
| 199 | ("oldstate", ct.c_uint), |
| 200 | ("newstate", ct.c_uint), |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 201 | ("task", ct.c_char * TASK_COMM_LEN) |
| 202 | ] |
| 203 | |
| 204 | class Data_ipv6(ct.Structure): |
| 205 | _fields_ = [ |
| 206 | ("ts_us", ct.c_ulonglong), |
| 207 | ("skaddr", ct.c_ulonglong), |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 208 | ("saddr", (ct.c_ulonglong * 2)), |
| 209 | ("daddr", (ct.c_ulonglong * 2)), |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 210 | ("span_us", ct.c_ulonglong), |
Brendan Gregg | 2b23de6 | 2018-03-21 15:41:16 -0700 | [diff] [blame] | 211 | ("pid", ct.c_uint), |
| 212 | ("ports", ct.c_uint), |
| 213 | ("oldstate", ct.c_uint), |
| 214 | ("newstate", ct.c_uint), |
Brendan Gregg | bbd9acd | 2018-03-20 18:35:12 -0700 | [diff] [blame] | 215 | ("task", ct.c_char * TASK_COMM_LEN) |
| 216 | ] |
| 217 | |
| 218 | # |
| 219 | # Setup output formats |
| 220 | # |
| 221 | # Don't change the default output (next 2 lines): this fits in 80 chars. I |
| 222 | # know it doesn't have NS or UIDs etc. I know. If you really, really, really |
| 223 | # need to add columns, columns that solve real actual problems, I'd start by |
| 224 | # adding an extended mode (-x) to included those columns. |
| 225 | # |
| 226 | header_string = "%-16s %-5s %-10.10s %s%-15s %-5s %-15s %-5s %-11s -> %-11s %s" |
| 227 | format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " + |
| 228 | "-> %-11s %.3f") |
| 229 | if args.wide: |
| 230 | header_string = ("%-16s %-5s %-16.16s %-2s %-26s %-5s %-26s %-5s %-11s " + |
| 231 | "-> %-11s %s") |
| 232 | format_string = ("%-16x %-5d %-16.16s %-2s %-26s %-5s %-26s %-5d %-11s " + |
| 233 | "-> %-11s %.3f") |
| 234 | if args.csv: |
| 235 | header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" |
| 236 | format_string = "%x,%d,%s,%s,%s,%s,%s,%d,%s,%s,%.3f" |
| 237 | |
| 238 | def tcpstate2str(state): |
| 239 | # from include/net/tcp_states.h: |
| 240 | tcpstate = { |
| 241 | 1: "ESTABLISHED", |
| 242 | 2: "SYN_SENT", |
| 243 | 3: "SYN_RECV", |
| 244 | 4: "FIN_WAIT1", |
| 245 | 5: "FIN_WAIT2", |
| 246 | 6: "TIME_WAIT", |
| 247 | 7: "CLOSE", |
| 248 | 8: "CLOSE_WAIT", |
| 249 | 9: "LAST_ACK", |
| 250 | 10: "LISTEN", |
| 251 | 11: "CLOSING", |
| 252 | 12: "NEW_SYN_RECV", |
| 253 | } |
| 254 | |
| 255 | if state in tcpstate: |
| 256 | return tcpstate[state] |
| 257 | else: |
| 258 | return str(state) |
| 259 | |
| 260 | # process event |
| 261 | def print_ipv4_event(cpu, data, size): |
| 262 | event = ct.cast(data, ct.POINTER(Data_ipv4)).contents |
| 263 | global start_ts |
| 264 | if args.time: |
| 265 | if args.csv: |
| 266 | print("%s," % strftime("%H:%M:%S"), end="") |
| 267 | else: |
| 268 | print("%-8s " % strftime("%H:%M:%S"), end="") |
| 269 | if args.timestamp: |
| 270 | if start_ts == 0: |
| 271 | start_ts = event.ts_us |
| 272 | delta_s = (float(event.ts_us) - start_ts) / 1000000 |
| 273 | if args.csv: |
| 274 | print("%.6f," % delta_s, end="") |
| 275 | else: |
| 276 | print("%-9.6f " % delta_s, end="") |
| 277 | print(format_string % (event.skaddr, event.pid, event.task.decode(), |
| 278 | "4" if args.wide or args.csv else "", |
| 279 | inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 32, |
| 280 | inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffffffff, |
| 281 | tcpstate2str(event.oldstate), tcpstate2str(event.newstate), |
| 282 | float(event.span_us) / 1000)) |
| 283 | |
| 284 | def print_ipv6_event(cpu, data, size): |
| 285 | event = ct.cast(data, ct.POINTER(Data_ipv6)).contents |
| 286 | global start_ts |
| 287 | if args.time: |
| 288 | if args.csv: |
| 289 | print("%s," % strftime("%H:%M:%S"), end="") |
| 290 | else: |
| 291 | print("%-8s " % strftime("%H:%M:%S"), end="") |
| 292 | if args.timestamp: |
| 293 | if start_ts == 0: |
| 294 | start_ts = event.ts_us |
| 295 | delta_s = (float(event.ts_us) - start_ts) / 1000000 |
| 296 | if args.csv: |
| 297 | print("%.6f," % delta_s, end="") |
| 298 | else: |
| 299 | print("%-9.6f " % delta_s, end="") |
| 300 | print(format_string % (event.skaddr, event.pid, event.task.decode(), |
| 301 | "6" if args.wide or args.csv else "", |
| 302 | inet_ntop(AF_INET6, event.saddr), event.ports >> 32, |
| 303 | inet_ntop(AF_INET6, event.daddr), event.ports & 0xffffffff, |
| 304 | tcpstate2str(event.oldstate), tcpstate2str(event.newstate), |
| 305 | float(event.span_us) / 1000)) |
| 306 | |
| 307 | # initialize BPF |
| 308 | b = BPF(text=bpf_text) |
| 309 | |
| 310 | # header |
| 311 | if args.time: |
| 312 | if args.csv: |
| 313 | print("%s," % ("TIME"), end="") |
| 314 | else: |
| 315 | print("%-8s " % ("TIME"), end="") |
| 316 | if args.timestamp: |
| 317 | if args.csv: |
| 318 | print("%s," % ("TIME(s)"), end="") |
| 319 | else: |
| 320 | print("%-9s " % ("TIME(s)"), end="") |
| 321 | print(header_string % ("SKADDR", "C-PID", "C-COMM", |
| 322 | "IP" if args.wide or args.csv else "", |
| 323 | "LADDR", "LPORT", "RADDR", "RPORT", |
| 324 | "OLDSTATE", "NEWSTATE", "MS")) |
| 325 | |
| 326 | start_ts = 0 |
| 327 | |
| 328 | # read events |
| 329 | b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64) |
| 330 | b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64) |
| 331 | while 1: |
| 332 | b.perf_buffer_poll() |