| #!/usr/bin/python |
| # |
| # tcpv4tracer Trace TCP connections. |
| # For Linux, uses BCC, eBPF. Embedded C. |
| # |
| # USAGE: tcpv4tracer [-h] [-v] [-p PID] [-N NETNS] |
| # |
| # You should generally try to avoid writing long scripts that measure multiple |
| # functions and walk multiple kernel structures, as they will be a burden to |
| # maintain as the kernel changes. |
| # The following code should be replaced, and simplified, when static TCP probes |
| # exist. |
| # |
| # Copyright 2017 Kinvolk GmbH |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License") |
| from __future__ import print_function |
| from bcc import BPF |
| |
| import argparse as ap |
| import ctypes |
| from socket import inet_ntop, AF_INET, AF_INET6 |
| from struct import pack |
| |
| parser = ap.ArgumentParser(description="Trace TCP connections", |
| formatter_class=ap.RawDescriptionHelpFormatter) |
| parser.add_argument("-t", "--timestamp", action="store_true", |
| help="include timestamp on output") |
| parser.add_argument("-p", "--pid", default=0, type=int, |
| help="trace this PID only") |
| parser.add_argument("-N", "--netns", default=0, type=int, |
| help="trace this Network Namespace only") |
| parser.add_argument("-v", "--verbose", action="store_true", |
| help="include Network Namespace in the output") |
| args = parser.parse_args() |
| |
| bpf_text = """ |
| #include <uapi/linux/ptrace.h> |
| #pragma clang diagnostic push |
| #pragma clang diagnostic ignored "-Wtautological-compare" |
| #include <net/sock.h> |
| #pragma clang diagnostic pop |
| #include <net/inet_sock.h> |
| #include <net/net_namespace.h> |
| #include <bcc/proto.h> |
| |
| #define TCP_EVENT_TYPE_CONNECT 1 |
| #define TCP_EVENT_TYPE_ACCEPT 2 |
| #define TCP_EVENT_TYPE_CLOSE 3 |
| |
| struct tcp_ipv4_event_t { |
| u64 ts_ns; |
| u32 type; |
| u32 pid; |
| char comm[TASK_COMM_LEN]; |
| u8 ip; |
| u32 saddr; |
| u32 daddr; |
| u16 sport; |
| u16 dport; |
| u32 netns; |
| }; |
| BPF_PERF_OUTPUT(tcp_ipv4_event); |
| |
| struct tcp_ipv6_event_t { |
| u64 ts_ns; |
| u32 type; |
| u32 pid; |
| char comm[TASK_COMM_LEN]; |
| u8 ip; |
| unsigned __int128 saddr; |
| unsigned __int128 daddr; |
| u16 sport; |
| u16 dport; |
| u32 netns; |
| }; |
| BPF_PERF_OUTPUT(tcp_ipv6_event); |
| |
| // tcp_set_state doesn't run in the context of the process that initiated the |
| // connection so we need to store a map TUPLE -> PID to send the right PID on |
| // the event |
| struct ipv4_tuple_t { |
| u32 saddr; |
| u32 daddr; |
| u16 sport; |
| u16 dport; |
| u32 netns; |
| }; |
| |
| struct ipv6_tuple_t { |
| unsigned __int128 saddr; |
| unsigned __int128 daddr; |
| u16 sport; |
| u16 dport; |
| u32 netns; |
| }; |
| |
| struct pid_comm_t { |
| u64 pid; |
| char comm[TASK_COMM_LEN]; |
| }; |
| |
| BPF_HASH(tuplepid_ipv4, struct ipv4_tuple_t, struct pid_comm_t); |
| BPF_HASH(tuplepid_ipv6, struct ipv6_tuple_t, struct pid_comm_t); |
| |
| BPF_HASH(connectsock, u64, struct sock *); |
| |
| static int read_ipv4_tuple(struct ipv4_tuple_t *tuple, struct sock *skp) |
| { |
| u32 net_ns_inum = 0; |
| u32 saddr = skp->__sk_common.skc_rcv_saddr; |
| u32 daddr = skp->__sk_common.skc_daddr; |
| struct inet_sock *sockp = (struct inet_sock *)skp; |
| u16 sport = sockp->inet_sport; |
| u16 dport = skp->__sk_common.skc_dport; |
| #ifdef CONFIG_NET_NS |
| possible_net_t skc_net = skp->__sk_common.skc_net; |
| bpf_probe_read(&net_ns_inum, sizeof(net_ns_inum), &skc_net.net->ns.inum); |
| #endif |
| |
| ##FILTER_NETNS## |
| |
| tuple->saddr = saddr; |
| tuple->daddr = daddr; |
| tuple->sport = sport; |
| tuple->dport = dport; |
| tuple->netns = net_ns_inum; |
| |
| // if addresses or ports are 0, ignore |
| if (saddr == 0 || daddr == 0 || sport == 0 || dport == 0) { |
| return 0; |
| } |
| |
| return 1; |
| } |
| |
| static int read_ipv6_tuple(struct ipv6_tuple_t *tuple, struct sock *skp) |
| { |
| u32 net_ns_inum = 0; |
| unsigned __int128 saddr = 0, daddr = 0; |
| struct inet_sock *sockp = (struct inet_sock *)skp; |
| u16 sport = sockp->inet_sport; |
| u16 dport = skp->__sk_common.skc_dport; |
| #ifdef CONFIG_NET_NS |
| possible_net_t skc_net = skp->__sk_common.skc_net; |
| bpf_probe_read(&net_ns_inum, sizeof(net_ns_inum), &skc_net.net->ns.inum); |
| #endif |
| bpf_probe_read(&saddr, sizeof(saddr), |
| skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); |
| bpf_probe_read(&daddr, sizeof(daddr), |
| skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32); |
| |
| ##FILTER_NETNS## |
| |
| tuple->saddr = saddr; |
| tuple->daddr = daddr; |
| tuple->sport = sport; |
| tuple->dport = dport; |
| tuple->netns = net_ns_inum; |
| |
| // if addresses or ports are 0, ignore |
| if (saddr == 0 || daddr == 0 || sport == 0 || dport == 0) { |
| return 0; |
| } |
| |
| return 1; |
| } |
| |
| static bool check_family(struct sock *sk, u16 expected_family) { |
| u64 zero = 0; |
| u16 family = sk->__sk_common.skc_family; |
| return family == expected_family; |
| } |
| |
| int trace_connect_v4_entry(struct pt_regs *ctx, struct sock *sk) |
| { |
| u64 pid = bpf_get_current_pid_tgid(); |
| |
| ##FILTER_PID## |
| |
| // stash the sock ptr for lookup on return |
| connectsock.update(&pid, &sk); |
| |
| return 0; |
| } |
| |
| int trace_connect_v4_return(struct pt_regs *ctx) |
| { |
| int ret = PT_REGS_RC(ctx); |
| u64 pid = bpf_get_current_pid_tgid(); |
| |
| struct sock **skpp; |
| skpp = connectsock.lookup(&pid); |
| if (skpp == 0) { |
| return 0; // missed entry |
| } |
| |
| connectsock.delete(&pid); |
| |
| if (ret != 0) { |
| // failed to send SYNC packet, may not have populated |
| // socket __sk_common.{skc_rcv_saddr, ...} |
| return 0; |
| } |
| |
| // pull in details |
| struct sock *skp = *skpp; |
| struct ipv4_tuple_t t = { }; |
| if (!read_ipv4_tuple(&t, skp)) { |
| return 0; |
| } |
| |
| struct pid_comm_t p = { }; |
| p.pid = pid; |
| bpf_get_current_comm(&p.comm, sizeof(p.comm)); |
| |
| tuplepid_ipv4.update(&t, &p); |
| |
| return 0; |
| } |
| |
| int trace_connect_v6_entry(struct pt_regs *ctx, struct sock *sk) |
| { |
| u64 pid = bpf_get_current_pid_tgid(); |
| |
| ##FILTER_PID## |
| |
| // stash the sock ptr for lookup on return |
| connectsock.update(&pid, &sk); |
| |
| return 0; |
| } |
| |
| int trace_connect_v6_return(struct pt_regs *ctx) |
| { |
| int ret = PT_REGS_RC(ctx); |
| u64 pid = bpf_get_current_pid_tgid(); |
| |
| struct sock **skpp; |
| skpp = connectsock.lookup(&pid); |
| if (skpp == 0) { |
| return 0; // missed entry |
| } |
| |
| connectsock.delete(&pid); |
| |
| if (ret != 0) { |
| // failed to send SYNC packet, may not have populated |
| // socket __sk_common.{skc_rcv_saddr, ...} |
| return 0; |
| } |
| |
| // pull in details |
| struct sock *skp = *skpp; |
| struct ipv6_tuple_t t = { }; |
| if (!read_ipv6_tuple(&t, skp)) { |
| return 0; |
| } |
| |
| struct pid_comm_t p = { }; |
| p.pid = pid; |
| bpf_get_current_comm(&p.comm, sizeof(p.comm)); |
| |
| tuplepid_ipv6.update(&t, &p); |
| |
| return 0; |
| } |
| |
| int trace_tcp_set_state_entry(struct pt_regs *ctx, struct sock *skp, int state) |
| { |
| if (state != TCP_ESTABLISHED && state != TCP_CLOSE) { |
| return 0; |
| } |
| |
| u8 ipver = 0; |
| if (check_family(skp, AF_INET)) { |
| ipver = 4; |
| struct ipv4_tuple_t t = { }; |
| if (!read_ipv4_tuple(&t, skp)) { |
| return 0; |
| } |
| |
| if (state == TCP_CLOSE) { |
| tuplepid_ipv4.delete(&t); |
| return 0; |
| } |
| |
| struct pid_comm_t *p; |
| p = tuplepid_ipv4.lookup(&t); |
| if (p == 0) { |
| return 0; // missed entry |
| } |
| |
| struct tcp_ipv4_event_t evt4 = { }; |
| evt4.ts_ns = bpf_ktime_get_ns(); |
| evt4.type = TCP_EVENT_TYPE_CONNECT; |
| evt4.pid = p->pid >> 32; |
| evt4.ip = ipver; |
| evt4.saddr = t.saddr; |
| evt4.daddr = t.daddr; |
| evt4.sport = ntohs(t.sport); |
| evt4.dport = ntohs(t.dport); |
| evt4.netns = t.netns; |
| |
| int i; |
| for (i = 0; i < TASK_COMM_LEN; i++) { |
| evt4.comm[i] = p->comm[i]; |
| } |
| |
| tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4)); |
| tuplepid_ipv4.delete(&t); |
| } else if (check_family(skp, AF_INET6)) { |
| ipver = 6; |
| struct ipv6_tuple_t t = { }; |
| if (!read_ipv6_tuple(&t, skp)) { |
| return 0; |
| } |
| |
| if (state == TCP_CLOSE) { |
| tuplepid_ipv6.delete(&t); |
| return 0; |
| } |
| |
| struct pid_comm_t *p; |
| p = tuplepid_ipv6.lookup(&t); |
| if (p == 0) { |
| return 0; // missed entry |
| } |
| |
| struct tcp_ipv6_event_t evt6 = { }; |
| evt6.ts_ns = bpf_ktime_get_ns(); |
| evt6.type = TCP_EVENT_TYPE_CONNECT; |
| evt6.pid = p->pid >> 32; |
| evt6.ip = ipver; |
| evt6.saddr = t.saddr; |
| evt6.daddr = t.daddr; |
| evt6.sport = ntohs(t.sport); |
| evt6.dport = ntohs(t.dport); |
| evt6.netns = t.netns; |
| |
| int i; |
| for (i = 0; i < TASK_COMM_LEN; i++) { |
| evt6.comm[i] = p->comm[i]; |
| } |
| |
| tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6)); |
| tuplepid_ipv6.delete(&t); |
| } |
| // else drop |
| |
| return 0; |
| } |
| |
| int trace_close_entry(struct pt_regs *ctx, struct sock *skp) |
| { |
| u64 pid = bpf_get_current_pid_tgid(); |
| |
| ##FILTER_PID## |
| |
| u8 oldstate = skp->sk_state; |
| // Don't generate close events for connections that were never |
| // established in the first place. |
| if (oldstate == TCP_SYN_SENT || |
| oldstate == TCP_SYN_RECV || |
| oldstate == TCP_NEW_SYN_RECV) |
| return 0; |
| |
| u8 ipver = 0; |
| if (check_family(skp, AF_INET)) { |
| ipver = 4; |
| struct ipv4_tuple_t t = { }; |
| if (!read_ipv4_tuple(&t, skp)) { |
| return 0; |
| } |
| |
| struct tcp_ipv4_event_t evt4 = { }; |
| evt4.ts_ns = bpf_ktime_get_ns(); |
| evt4.type = TCP_EVENT_TYPE_CLOSE; |
| evt4.pid = pid >> 32; |
| evt4.ip = ipver; |
| evt4.saddr = t.saddr; |
| evt4.daddr = t.daddr; |
| evt4.sport = ntohs(t.sport); |
| evt4.dport = ntohs(t.dport); |
| evt4.netns = t.netns; |
| bpf_get_current_comm(&evt4.comm, sizeof(evt4.comm)); |
| |
| tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4)); |
| } else if (check_family(skp, AF_INET6)) { |
| ipver = 6; |
| struct ipv6_tuple_t t = { }; |
| if (!read_ipv6_tuple(&t, skp)) { |
| return 0; |
| } |
| |
| struct tcp_ipv6_event_t evt6 = { }; |
| evt6.ts_ns = bpf_ktime_get_ns(); |
| evt6.type = TCP_EVENT_TYPE_CLOSE; |
| evt6.pid = pid >> 32; |
| evt6.ip = ipver; |
| evt6.saddr = t.saddr; |
| evt6.daddr = t.daddr; |
| evt6.sport = ntohs(t.sport); |
| evt6.dport = ntohs(t.dport); |
| evt6.netns = t.netns; |
| bpf_get_current_comm(&evt6.comm, sizeof(evt6.comm)); |
| |
| tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6)); |
| } |
| // else drop |
| |
| return 0; |
| }; |
| |
| int trace_accept_return(struct pt_regs *ctx) |
| { |
| struct sock *newsk = (struct sock *)PT_REGS_RC(ctx); |
| u64 pid = bpf_get_current_pid_tgid(); |
| |
| ##FILTER_PID## |
| |
| if (newsk == NULL) { |
| return 0; |
| } |
| |
| // pull in details |
| u16 lport = 0, dport = 0; |
| u32 net_ns_inum = 0; |
| u8 ipver = 0; |
| |
| bpf_probe_read(&dport, sizeof(dport), &newsk->__sk_common.skc_dport); |
| bpf_probe_read(&lport, sizeof(lport), &newsk->__sk_common.skc_num); |
| |
| // Get network namespace id, if kernel supports it |
| #ifdef CONFIG_NET_NS |
| possible_net_t skc_net; |
| bpf_probe_read(&skc_net, sizeof(skc_net), &newsk->__sk_common.skc_net); |
| bpf_probe_read(&net_ns_inum, sizeof(net_ns_inum), &skc_net.net->ns.inum); |
| #else |
| net_ns_inum = 0; |
| #endif |
| |
| ##FILTER_NETNS## |
| |
| if (check_family(newsk, AF_INET)) { |
| ipver = 4; |
| |
| struct tcp_ipv4_event_t evt4 = { 0 }; |
| |
| evt4.ts_ns = bpf_ktime_get_ns(); |
| evt4.type = TCP_EVENT_TYPE_ACCEPT; |
| evt4.netns = net_ns_inum; |
| evt4.pid = pid >> 32; |
| evt4.ip = ipver; |
| |
| bpf_probe_read(&evt4.saddr, sizeof(evt4.saddr), |
| &newsk->__sk_common.skc_rcv_saddr); |
| bpf_probe_read(&evt4.daddr, sizeof(evt4.daddr), |
| &newsk->__sk_common.skc_daddr); |
| |
| evt4.sport = lport; |
| evt4.dport = ntohs(dport); |
| bpf_get_current_comm(&evt4.comm, sizeof(evt4.comm)); |
| |
| // do not send event if IP address is 0.0.0.0 or port is 0 |
| if (evt4.saddr != 0 && evt4.daddr != 0 && |
| evt4.sport != 0 && evt4.dport != 0) { |
| tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4)); |
| } |
| } else if (check_family(newsk, AF_INET6)) { |
| ipver = 6; |
| |
| struct tcp_ipv6_event_t evt6 = { 0 }; |
| |
| evt6.ts_ns = bpf_ktime_get_ns(); |
| evt6.type = TCP_EVENT_TYPE_ACCEPT; |
| evt6.netns = net_ns_inum; |
| evt6.pid = pid >> 32; |
| evt6.ip = ipver; |
| |
| bpf_probe_read(&evt6.saddr, sizeof(evt6.saddr), |
| newsk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); |
| bpf_probe_read(&evt6.daddr, sizeof(evt6.daddr), |
| newsk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); |
| |
| evt6.sport = lport; |
| evt6.dport = ntohs(dport); |
| bpf_get_current_comm(&evt6.comm, sizeof(evt6.comm)); |
| |
| // do not send event if IP address is 0.0.0.0 or port is 0 |
| if (evt6.saddr != 0 && evt6.daddr != 0 && |
| evt6.sport != 0 && evt6.dport != 0) { |
| tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6)); |
| } |
| } |
| // else drop |
| |
| return 0; |
| } |
| """ |
| |
| TASK_COMM_LEN = 16 # linux/sched.h |
| |
| |
| class TCPIPV4Evt(ctypes.Structure): |
| _fields_ = [ |
| ("ts_ns", ctypes.c_ulonglong), |
| ("type", ctypes.c_uint), |
| ("pid", ctypes.c_uint), |
| ("comm", ctypes.c_char * TASK_COMM_LEN), |
| ("ip", ctypes.c_ubyte), |
| ("saddr", ctypes.c_uint), |
| ("daddr", ctypes.c_uint), |
| ("sport", ctypes.c_ushort), |
| ("dport", ctypes.c_ushort), |
| ("netns", ctypes.c_uint) |
| ] |
| |
| |
| class TCPIPV6Evt(ctypes.Structure): |
| _fields_ = [ |
| ("ts_ns", ctypes.c_ulonglong), |
| ("type", ctypes.c_uint), |
| ("pid", ctypes.c_uint), |
| ("comm", ctypes.c_char * TASK_COMM_LEN), |
| ("ip", ctypes.c_ubyte), |
| ("saddr", (ctypes.c_ulong * 2)), |
| ("daddr", (ctypes.c_ulong * 2)), |
| ("sport", ctypes.c_ushort), |
| ("dport", ctypes.c_ushort), |
| ("netns", ctypes.c_uint) |
| ] |
| |
| |
| verbose_types = {"C": "connect", "A": "accept", |
| "X": "close", "U": "unknown"} |
| |
| |
| def print_ipv4_event(cpu, data, size): |
| event = ctypes.cast(data, ctypes.POINTER(TCPIPV4Evt)).contents |
| global start_ts |
| |
| if args.timestamp: |
| if start_ts == 0: |
| start_ts = event.ts_ns |
| if args.verbose: |
| print("%-14d" % (event.ts_ns - start_ts), end="") |
| else: |
| print("%-9.3f" % ((float(event.ts_ns) - start_ts) / 1000000000), end="") |
| if event.type == 1: |
| type_str = "C" |
| elif event.type == 2: |
| type_str = "A" |
| elif event.type == 3: |
| type_str = "X" |
| else: |
| type_str = "U" |
| |
| if args.verbose: |
| print("%-12s " % (verbose_types[type_str]), end="") |
| else: |
| print("%-2s " % (type_str), end="") |
| |
| print("%-6d %-16s %-2d %-16s %-16s %-6d %-6d" % |
| (event.pid, event.comm.decode('utf-8'), |
| event.ip, |
| inet_ntop(AF_INET, pack("I", event.saddr)), |
| inet_ntop(AF_INET, pack("I", event.daddr)), |
| event.sport, |
| event.dport), end="") |
| if args.verbose and not args.netns: |
| print(" %-8d" % event.netns) |
| else: |
| print() |
| |
| |
| def print_ipv6_event(cpu, data, size): |
| event = ctypes.cast(data, ctypes.POINTER(TCPIPV6Evt)).contents |
| global start_ts |
| if args.timestamp: |
| if start_ts == 0: |
| start_ts = event.ts_ns |
| if args.verbose: |
| print("%-14d" % (event.ts_ns - start_ts), end="") |
| else: |
| print("%-9.3f" % ((float(event.ts_ns) - start_ts) / 1000000000), end="") |
| if event.type == 1: |
| type_str = "C" |
| elif event.type == 2: |
| type_str = "A" |
| elif event.type == 3: |
| type_str = "X" |
| else: |
| type_str = "U" |
| |
| if args.verbose: |
| print("%-12s " % (verbose_types[type_str]), end="") |
| else: |
| print("%-2s " % (type_str), end="") |
| |
| print("%-6d %-16s %-2d %-16s %-16s %-6d %-6d" % |
| (event.pid, event.comm.decode('utf-8'), |
| event.ip, |
| "["+inet_ntop(AF_INET6, event.saddr)+"]", |
| "["+inet_ntop(AF_INET6, event.daddr)+"]", |
| event.sport, |
| event.dport), end="") |
| if args.verbose and not args.netns: |
| print(" %-8d" % event.netns) |
| else: |
| print() |
| |
| |
| pid_filter = "" |
| netns_filter = "" |
| |
| if args.pid: |
| pid_filter = 'if (pid >> 32 != %d) { return 0; }' % args.pid |
| if args.netns: |
| netns_filter = 'if (net_ns_inum != %d) { return 0; }' % args.netns |
| |
| bpf_text = bpf_text.replace('##FILTER_PID##', pid_filter) |
| bpf_text = bpf_text.replace('##FILTER_NETNS##', netns_filter) |
| |
| # initialize BPF |
| b = BPF(text=bpf_text) |
| b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_entry") |
| b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return") |
| b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_entry") |
| b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return") |
| b.attach_kprobe(event="tcp_set_state", fn_name="trace_tcp_set_state_entry") |
| b.attach_kprobe(event="tcp_close", fn_name="trace_close_entry") |
| b.attach_kretprobe(event="inet_csk_accept", fn_name="trace_accept_return") |
| |
| print("Tracing TCP established connections. Ctrl-C to end.") |
| |
| # header |
| if args.verbose: |
| if args.timestamp: |
| print("%-14s" % ("TIME(ns)"), end="") |
| print("%-12s %-6s %-16s %-2s %-16s %-16s %-6s %-7s" % ("TYPE", |
| "PID", "COMM", "IP", "SADDR", "DADDR", "SPORT", "DPORT"), end="") |
| if not args.netns: |
| print("%-8s" % "NETNS", end="") |
| print() |
| else: |
| if args.timestamp: |
| print("%-9s" % ("TIME(s)"), end="") |
| print("%-2s %-6s %-16s %-2s %-16s %-16s %-6s %-6s" % |
| ("T", "PID", "COMM", "IP", "SADDR", "DADDR", "SPORT", "DPORT")) |
| |
| start_ts = 0 |
| |
| def inet_ntoa(addr): |
| dq = '' |
| for i in range(0, 4): |
| dq = dq + str(addr & 0xff) |
| if (i != 3): |
| dq = dq + '.' |
| addr = addr >> 8 |
| return dq |
| |
| |
| b["tcp_ipv4_event"].open_perf_buffer(print_ipv4_event) |
| b["tcp_ipv6_event"].open_perf_buffer(print_ipv6_event) |
| while True: |
| b.kprobe_poll() |