blob: 075033522333fa2447330c1cecd813ef92791124 [file] [log] [blame]
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -08001#!/usr/bin/python
2#
3# bindsnoop Trace IPv4 and IPv6 binds()s.
4# For Linux, uses BCC, eBPF. Embedded C.
5#
6# based on tcpconnect utility from Brendan Gregg's suite.
7#
8# USAGE: bindsnoop [-h] [-t] [-E] [-p PID] [-P PORT[,PORT ...]] [-w]
Alban Crequy32ab8582020-03-22 16:06:44 +01009# [--count] [--cgroupmap mappath] [--mntnsmap mappath]
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -080010#
11# bindsnoop reports socket options set before the bind call
12# that would impact this system call behavior:
13# SOL_IP IP_FREEBIND F....
14# SOL_IP IP_TRANSPARENT .T...
15# SOL_IP IP_BIND_ADDRESS_NO_PORT ..N..
16# SOL_SOCKET SO_REUSEADDR ...R.
17# SOL_SOCKET SO_REUSEPORT ....r
18#
19# SO_BINDTODEVICE interface is reported as "BOUND_IF" index
20#
21# This uses dynamic tracing of kernel functions, and will need to be updated
22# to match kernel changes.
23#
24# Copyright (c) 2020-present Facebook.
25# Licensed under the Apache License, Version 2.0 (the "License")
26#
27# 14-Feb-2020 Pavel Dubovitsky Created this.
28
29from __future__ import print_function, absolute_import, unicode_literals
Hengqi Chenf96fed02022-02-20 15:33:37 +080030from bcc import BPF
Alban Crequy32ab8582020-03-22 16:06:44 +010031from bcc.containers import filter_by_containers
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -080032from bcc.utils import printb
33import argparse
34import re
35from os import strerror
36from socket import (
37 inet_ntop, AF_INET, AF_INET6, __all__ as socket_all, __dict__ as socket_dct
38)
39from struct import pack
40from time import sleep
41
42# arguments
43examples = """examples:
44 ./bindsnoop # trace all TCP bind()s
45 ./bindsnoop -t # include timestamps
chenhengqi396d5d32021-05-11 13:22:10 +080046 ./bindsnoop -w # wider columns (fit IPv6)
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -080047 ./bindsnoop -p 181 # only trace PID 181
48 ./bindsnoop -P 80 # only trace port 80
49 ./bindsnoop -P 80,81 # only trace port 80 and 81
50 ./bindsnoop -U # include UID
51 ./bindsnoop -u 1000 # only trace UID 1000
52 ./bindsnoop -E # report bind errors
53 ./bindsnoop --count # count bind per src ip
54 ./bindsnoop --cgroupmap mappath # only trace cgroups in this BPF map
Alban Crequy32ab8582020-03-22 16:06:44 +010055 ./bindsnoop --mntnsmap mappath # only trace mount namespaces in the map
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -080056
57it is reporting socket options set before the bins call
58impacting system call behavior:
59 SOL_IP IP_FREEBIND F....
60 SOL_IP IP_TRANSPARENT .T...
61 SOL_IP IP_BIND_ADDRESS_NO_PORT ..N..
62 SOL_SOCKET SO_REUSEADDR ...R.
63 SOL_SOCKET SO_REUSEPORT ....r
64
65 SO_BINDTODEVICE interface is reported as "IF" index
66"""
67parser = argparse.ArgumentParser(
68 description="Trace TCP binds",
69 formatter_class=argparse.RawDescriptionHelpFormatter,
70 epilog=examples)
71parser.add_argument("-t", "--timestamp", action="store_true",
72 help="include timestamp on output")
73parser.add_argument("-w", "--wide", action="store_true",
74 help="wide column output (fits IPv6 addresses)")
75parser.add_argument("-p", "--pid",
76 help="trace this PID only")
77parser.add_argument("-P", "--port",
78 help="comma-separated list of ports to trace.")
79parser.add_argument("-E", "--errors", action="store_true",
80 help="include errors in the output.")
81parser.add_argument("-U", "--print-uid", action="store_true",
82 help="include UID on output")
83parser.add_argument("-u", "--uid",
84 help="trace this UID only")
85parser.add_argument("--count", action="store_true",
86 help="count binds per src ip and port")
87parser.add_argument("--cgroupmap",
88 help="trace cgroups in this BPF map only")
Alban Crequy32ab8582020-03-22 16:06:44 +010089parser.add_argument("--mntnsmap",
90 help="trace mount namespaces in this BPF map only")
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -080091parser.add_argument("--ebpf", action="store_true",
92 help=argparse.SUPPRESS)
93parser.add_argument("--debug-source", action="store_true",
94 help=argparse.SUPPRESS)
95args = parser.parse_args()
96
97# define BPF program
98bpf_text = """
99#include <uapi/linux/ptrace.h>
100#pragma clang diagnostic push
101#pragma clang diagnostic ignored "-Wtautological-compare"
102#include <net/sock.h>
103#pragma clang diagnostic pop
104#include <net/inet_sock.h>
105#include <net/net_namespace.h>
106#include <bcc/proto.h>
107
108BPF_HASH(currsock, u32, struct socket *);
109
110// separate data structs for ipv4 and ipv6
111struct ipv4_bind_data_t {
112 u64 ts_us;
113 u32 pid;
114 u32 uid;
115 u64 ip;
116 u32 saddr;
117 u32 bound_dev_if;
118 int return_code;
119 u16 sport;
120 u8 socket_options;
121 u8 protocol;
122 char task[TASK_COMM_LEN];
123};
124BPF_PERF_OUTPUT(ipv4_bind_events);
125
126struct ipv6_bind_data_t {
127 // int128 would be aligned on 16 bytes boundary, better to go first
128 unsigned __int128 saddr;
129 u64 ts_us;
130 u32 pid;
131 u32 uid;
132 u64 ip;
133 u32 bound_dev_if;
134 int return_code;
135 u16 sport;
136 u8 socket_options;
137 u8 protocol;
138 char task[TASK_COMM_LEN];
139};
140BPF_PERF_OUTPUT(ipv6_bind_events);
141
142// separate flow keys per address family
143struct ipv4_flow_key_t {
144 u32 saddr;
145 u16 sport;
146};
147BPF_HASH(ipv4_count, struct ipv4_flow_key_t);
148
149struct ipv6_flow_key_t {
150 unsigned __int128 saddr;
151 u16 sport;
152};
153BPF_HASH(ipv6_count, struct ipv6_flow_key_t);
154
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800155// bind options for event reporting
156union bind_options {
157 u8 data;
158 struct {
159 u8 freebind:1;
160 u8 transparent:1;
161 u8 bind_address_no_port:1;
162 u8 reuseaddress:1;
163 u8 reuseport:1;
164 } fields;
165};
166
167// TODO: add reporting for the original bind arguments
168int bindsnoop_entry(struct pt_regs *ctx, struct socket *socket)
169{
170 u64 pid_tgid = bpf_get_current_pid_tgid();
171 u32 pid = pid_tgid >> 32;
172 u32 tid = pid_tgid;
173 FILTER_PID
174
175 u32 uid = bpf_get_current_uid_gid();
176
177 FILTER_UID
178
Alban Crequy32ab8582020-03-22 16:06:44 +0100179 if (container_should_be_filtered()) {
180 return 0;
181 }
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800182
183 // stash the sock ptr for lookup on return
184 currsock.update(&tid, &socket);
185
186 return 0;
187};
188
189
190static int bindsnoop_return(struct pt_regs *ctx, short ipver)
191{
192 int ret = PT_REGS_RC(ctx);
193 u64 pid_tgid = bpf_get_current_pid_tgid();
194 u32 pid = pid_tgid >> 32;
195 u32 tid = pid_tgid;
196
197 struct socket **skpp;
198 skpp = currsock.lookup(&tid);
199 if (skpp == 0) {
200 return 0; // missed entry
201 }
202
203 int ignore_errors = 1;
204 FILTER_ERRORS
205 if (ret != 0 && ignore_errors) {
206 // failed to bind
207 currsock.delete(&tid);
208 return 0;
209 }
210
211 // pull in details
212 struct socket *skp_ = *skpp;
213 struct sock *skp = skp_->sk;
214
215 struct inet_sock *sockp = (struct inet_sock *)skp;
216
217 u16 sport = 0;
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500218 bpf_probe_read_kernel(&sport, sizeof(sport), &sockp->inet_sport);
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800219 sport = ntohs(sport);
220
221 FILTER_PORT
222
223 union bind_options opts = {0};
224 u8 bitfield;
225 // fetching freebind, transparent, and bind_address_no_port bitfields
226 // via the next struct member, rcv_tos
227 bitfield = (u8) *(&sockp->rcv_tos - 2) & 0xFF;
228 // IP_FREEBIND (sockp->freebind)
229 opts.fields.freebind = bitfield >> 2 & 0x01;
230 // IP_TRANSPARENT (sockp->transparent)
231 opts.fields.transparent = bitfield >> 5 & 0x01;
232 // IP_BIND_ADDRESS_NO_PORT (sockp->bind_address_no_port)
233 opts.fields.bind_address_no_port = *(&sockp->rcv_tos - 1) & 0x01;
234
235 // SO_REUSEADDR and SO_REUSEPORT are bitfields that
236 // cannot be accessed directly, fetched via the next struct member,
237 // __sk_common.skc_bound_dev_if
238 bitfield = *((u8*)&skp->__sk_common.skc_bound_dev_if - 1);
239 // SO_REUSEADDR (skp->reuse)
240 // it is 4 bit, but we are interested in the lowest one
241 opts.fields.reuseaddress = bitfield & 0x0F;
242 // SO_REUSEPORT (skp->reuseport)
243 opts.fields.reuseport = bitfield >> 4 & 0x01;
244
245 // workaround for reading the sk_protocol bitfield (from tcpaccept.py):
Hengqi Chenf96fed02022-02-20 15:33:37 +0800246 u16 protocol;
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800247 int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs);
248 int sk_lingertime_offset = offsetof(struct sock, sk_lingertime);
Hengqi Chenf96fed02022-02-20 15:33:37 +0800249
250 // Since kernel v5.6 sk_protocol has its own u16 field
251 if (sk_lingertime_offset - gso_max_segs_offset == 2)
252 protocol = skp->sk_protocol;
253 else if (sk_lingertime_offset - gso_max_segs_offset == 4)
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800254 // 4.10+ with little endian
255#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
256 protocol = *(u8 *)((u64)&skp->sk_gso_max_segs - 3);
257 else
258 // pre-4.10 with little endian
259 protocol = *(u8 *)((u64)&skp->sk_wmem_queued - 3);
260#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
261 // 4.10+ with big endian
262 protocol = *(u8 *)((u64)&skp->sk_gso_max_segs - 1);
263 else
264 // pre-4.10 with big endian
265 protocol = *(u8 *)((u64)&skp->sk_wmem_queued - 1);
266#else
267# error "Fix your compiler's __BYTE_ORDER__?!"
268#endif
269
270 if (ipver == 4) {
271 IPV4_CODE
272 } else /* 6 */ {
273 IPV6_CODE
274 }
275
276 currsock.delete(&tid);
277
278 return 0;
279}
280
281int bindsnoop_v4_return(struct pt_regs *ctx)
282{
283 return bindsnoop_return(ctx, 4);
284}
285
286int bindsnoop_v6_return(struct pt_regs *ctx)
287{
288 return bindsnoop_return(ctx, 6);
289}
290"""
291
292struct_init = {
293 'ipv4': {
294 'count': """
295 struct ipv4_flow_key_t flow_key = {};
296 flow_key.saddr = skp->__sk_common.skc_rcv_saddr;
297 flow_key.sport = sport;
298 ipv4_count.increment(flow_key);""",
299 'trace': """
300 struct ipv4_bind_data_t data4 = {.pid = pid, .ip = ipver};
301 data4.uid = bpf_get_current_uid_gid();
302 data4.ts_us = bpf_ktime_get_ns() / 1000;
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500303 bpf_probe_read_kernel(
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800304 &data4.saddr, sizeof(data4.saddr), &sockp->inet_saddr);
305 data4.return_code = ret;
306 data4.sport = sport;
307 data4.bound_dev_if = skp->__sk_common.skc_bound_dev_if;
308 data4.socket_options = opts.data;
309 data4.protocol = protocol;
310 bpf_get_current_comm(&data4.task, sizeof(data4.task));
311 ipv4_bind_events.perf_submit(ctx, &data4, sizeof(data4));"""
312 },
313 'ipv6': {
314 'count': """
315 struct ipv6_flow_key_t flow_key = {};
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500316 bpf_probe_read_kernel(&flow_key.saddr, sizeof(flow_key.saddr),
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800317 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
318 flow_key.sport = sport;
319 ipv6_count.increment(flow_key);""",
320 'trace': """
321 struct ipv6_bind_data_t data6 = {.pid = pid, .ip = ipver};
322 data6.uid = bpf_get_current_uid_gid();
323 data6.ts_us = bpf_ktime_get_ns() / 1000;
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500324 bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr),
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800325 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
326 data6.return_code = ret;
327 data6.sport = sport;
328 data6.bound_dev_if = skp->__sk_common.skc_bound_dev_if;
329 data6.socket_options = opts.data;
330 data6.protocol = protocol;
331 bpf_get_current_comm(&data6.task, sizeof(data6.task));
332 ipv6_bind_events.perf_submit(ctx, &data6, sizeof(data6));"""
333 },
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800334}
335
336# code substitutions
337if args.count:
338 bpf_text = bpf_text.replace("IPV4_CODE", struct_init['ipv4']['count'])
339 bpf_text = bpf_text.replace("IPV6_CODE", struct_init['ipv6']['count'])
340else:
341 bpf_text = bpf_text.replace("IPV4_CODE", struct_init['ipv4']['trace'])
342 bpf_text = bpf_text.replace("IPV6_CODE", struct_init['ipv6']['trace'])
343
344if args.pid:
345 bpf_text = bpf_text.replace('FILTER_PID',
346 'if (pid != %s) { return 0; }' % args.pid)
347if args.port:
348 sports = [int(sport) for sport in args.port.split(',')]
349 sports_if = ' && '.join(['sport != %d' % sport for sport in sports])
350 bpf_text = bpf_text.replace('FILTER_PORT',
Mauricio Vásquez884799f2020-10-07 20:11:25 -0500351 'if (%s) { currsock.delete(&tid); return 0; }' % sports_if)
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800352if args.uid:
353 bpf_text = bpf_text.replace('FILTER_UID',
354 'if (uid != %s) { return 0; }' % args.uid)
355if args.errors:
356 bpf_text = bpf_text.replace('FILTER_ERRORS', 'ignore_errors = 0;')
Alban Crequy32ab8582020-03-22 16:06:44 +0100357bpf_text = filter_by_containers(args) + bpf_text
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800358bpf_text = bpf_text.replace('FILTER_PID', '')
359bpf_text = bpf_text.replace('FILTER_PORT', '')
360bpf_text = bpf_text.replace('FILTER_UID', '')
361bpf_text = bpf_text.replace('FILTER_ERRORS', '')
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -0800362
363# selecting output format - 80 characters or wide, fitting IPv6 addresses
364header_fmt = "%8s %-12.12s %-4s %-15s %-5s %5s %2s"
365output_fmt = b"%8d %-12.12s %-4.4s %-15.15s %5d %-5s %2d"
366error_header_fmt = "%3s "
367error_output_fmt = b"%3s "
368error_value_fmt = str
369if args.wide:
370 header_fmt = "%10s %-12.12s %-4s %-39s %-5s %5s %2s"
371 output_fmt = b"%10d %-12.12s %-4s %-39s %5d %-5s %2d"
372 error_header_fmt = "%-25s "
373 error_output_fmt = b"%-25s "
374 error_value_fmt = strerror
375
376if args.ebpf:
377 print(bpf_text)
378 exit()
379
380# L4 protocol resolver
381class L4Proto:
382 def __init__(self):
383 self.num2str = {}
384 proto_re = re.compile("IPPROTO_(.*)")
385 for attr in socket_all:
386 proto_match = proto_re.match(attr)
387 if proto_match:
388 self.num2str[socket_dct[attr]] = proto_match.group(1)
389
390 def proto2str(self, proto):
391 return self.num2str.get(proto, "UNKNOWN")
392
393l4 = L4Proto()
394
395# bind options:
396# SOL_IP IP_FREEBIND F....
397# SOL_IP IP_TRANSPARENT .T...
398# SOL_IP IP_BIND_ADDRESS_NO_PORT ..N..
399# SOL_SOCKET SO_REUSEADDR ...R.
400# SOL_SOCKET SO_REUSEPORT ....r
401def opts2str(bitfield):
402 str_options = ""
403 bit = 1
404 for opt in "FTNRr":
405 str_options += opt if bitfield & bit else "."
406 bit *= 2
407 return str_options.encode()
408
409
410# process events
411def print_ipv4_bind_event(cpu, data, size):
412 event = b["ipv4_bind_events"].event(data)
413 global start_ts
414 if args.timestamp:
415 if start_ts == 0:
416 start_ts = event.ts_us
417 printb(b"%-9.6f " % ((float(event.ts_us) - start_ts) / 1000000), nl="")
418 if args.print_uid:
419 printb(b"%6d " % event.uid, nl="")
420 if args.errors:
421 printb(
422 error_output_fmt % error_value_fmt(event.return_code).encode(),
423 nl="",
424 )
425 printb(output_fmt % (event.pid, event.task,
426 l4.proto2str(event.protocol).encode(),
427 inet_ntop(AF_INET, pack("I", event.saddr)).encode(),
428 event.sport, opts2str(event.socket_options), event.bound_dev_if))
429
430
431def print_ipv6_bind_event(cpu, data, size):
432 event = b["ipv6_bind_events"].event(data)
433 global start_ts
434 if args.timestamp:
435 if start_ts == 0:
436 start_ts = event.ts_us
437 printb(b"%-9.6f " % ((float(event.ts_us) - start_ts) / 1000000), nl="")
438 if args.print_uid:
439 printb(b"%6d " % event.uid, nl="")
440 if args.errors:
441 printb(
442 error_output_fmt % error_value_fmt(event.return_code).encode(),
443 nl="",
444 )
445 printb(output_fmt % (event.pid, event.task,
446 l4.proto2str(event.protocol).encode(),
447 inet_ntop(AF_INET6, event.saddr).encode(),
448 event.sport, opts2str(event.socket_options), event.bound_dev_if))
449
450
451def depict_cnt(counts_tab, l3prot='ipv4'):
452 for k, v in sorted(
453 counts_tab.items(), key=lambda counts: counts[1].value, reverse=True
454 ):
455 depict_key = ""
456 if l3prot == 'ipv4':
457 depict_key = "%-32s %20s" % (
458 (inet_ntop(AF_INET, pack('I', k.saddr))), k.sport
459 )
460 else:
461 depict_key = "%-32s %20s" % (
462 (inet_ntop(AF_INET6, k.saddr)), k.sport
463 )
464 print("%s %-10d" % (depict_key, v.value))
465
466
467# initialize BPF
468b = BPF(text=bpf_text)
469b.attach_kprobe(event="inet_bind", fn_name="bindsnoop_entry")
470b.attach_kprobe(event="inet6_bind", fn_name="bindsnoop_entry")
471b.attach_kretprobe(event="inet_bind", fn_name="bindsnoop_v4_return")
472b.attach_kretprobe(event="inet6_bind", fn_name="bindsnoop_v6_return")
473
474print("Tracing binds ... Hit Ctrl-C to end")
475if args.count:
476 try:
477 while 1:
478 sleep(99999999)
479 except KeyboardInterrupt:
480 pass
481
482 # header
483 print("\n%-32s %20s %-10s" % (
484 "LADDR", "LPORT", "BINDS"))
485 depict_cnt(b["ipv4_count"])
486 depict_cnt(b["ipv6_count"], l3prot='ipv6')
487# read events
488else:
489 # header
490 if args.timestamp:
491 print("%-9s " % ("TIME(s)"), end="")
492 if args.print_uid:
493 print("%6s " % ("UID"), end="")
494 if args.errors:
495 print(error_header_fmt % ("RC"), end="")
496 print(header_fmt % ("PID", "COMM", "PROT", "ADDR", "PORT", "OPTS", "IF"))
497
498 start_ts = 0
499
500 # read events
501 b["ipv4_bind_events"].open_perf_buffer(print_ipv4_bind_event)
502 b["ipv6_bind_events"].open_perf_buffer(print_ipv6_bind_event)
503 while 1:
504 try:
505 b.perf_buffer_poll()
506 except KeyboardInterrupt:
507 exit()