blob: 4d3133fcfd261c118230853a764064b926904cc6 [file] [log] [blame]
Pavel Dubovitsky8dd4b5a2020-02-18 19:49:11 -08001#!/usr/bin/python
2#
3# bindsnoop Trace IPv4 and IPv6 binds()s.
4# For Linux, uses BCC, eBPF. Embedded C.
5#
6# based on tcpconnect utility from Brendan Gregg's suite.
7#
8# USAGE: bindsnoop [-h] [-t] [-E] [-p PID] [-P PORT[,PORT ...]] [-w]
9# [--count] [--cgroupmap mappath]
10#
11# bindsnoop reports socket options set before the bind call
12# that would impact this system call behavior:
13# SOL_IP IP_FREEBIND F....
14# SOL_IP IP_TRANSPARENT .T...
15# SOL_IP IP_BIND_ADDRESS_NO_PORT ..N..
16# SOL_SOCKET SO_REUSEADDR ...R.
17# SOL_SOCKET SO_REUSEPORT ....r
18#
19# SO_BINDTODEVICE interface is reported as "BOUND_IF" index
20#
21# This uses dynamic tracing of kernel functions, and will need to be updated
22# to match kernel changes.
23#
24# Copyright (c) 2020-present Facebook.
25# Licensed under the Apache License, Version 2.0 (the "License")
26#
27# 14-Feb-2020 Pavel Dubovitsky Created this.
28
29from __future__ import print_function, absolute_import, unicode_literals
30from bcc import BPF, DEBUG_SOURCE
31from bcc.utils import printb
32import argparse
33import re
34from os import strerror
35from socket import (
36 inet_ntop, AF_INET, AF_INET6, __all__ as socket_all, __dict__ as socket_dct
37)
38from struct import pack
39from time import sleep
40
41# arguments
42examples = """examples:
43 ./bindsnoop # trace all TCP bind()s
44 ./bindsnoop -t # include timestamps
45 ./tcplife -w # wider columns (fit IPv6)
46 ./bindsnoop -p 181 # only trace PID 181
47 ./bindsnoop -P 80 # only trace port 80
48 ./bindsnoop -P 80,81 # only trace port 80 and 81
49 ./bindsnoop -U # include UID
50 ./bindsnoop -u 1000 # only trace UID 1000
51 ./bindsnoop -E # report bind errors
52 ./bindsnoop --count # count bind per src ip
53 ./bindsnoop --cgroupmap mappath # only trace cgroups in this BPF map
54
55it is reporting socket options set before the bins call
56impacting system call behavior:
57 SOL_IP IP_FREEBIND F....
58 SOL_IP IP_TRANSPARENT .T...
59 SOL_IP IP_BIND_ADDRESS_NO_PORT ..N..
60 SOL_SOCKET SO_REUSEADDR ...R.
61 SOL_SOCKET SO_REUSEPORT ....r
62
63 SO_BINDTODEVICE interface is reported as "IF" index
64"""
65parser = argparse.ArgumentParser(
66 description="Trace TCP binds",
67 formatter_class=argparse.RawDescriptionHelpFormatter,
68 epilog=examples)
69parser.add_argument("-t", "--timestamp", action="store_true",
70 help="include timestamp on output")
71parser.add_argument("-w", "--wide", action="store_true",
72 help="wide column output (fits IPv6 addresses)")
73parser.add_argument("-p", "--pid",
74 help="trace this PID only")
75parser.add_argument("-P", "--port",
76 help="comma-separated list of ports to trace.")
77parser.add_argument("-E", "--errors", action="store_true",
78 help="include errors in the output.")
79parser.add_argument("-U", "--print-uid", action="store_true",
80 help="include UID on output")
81parser.add_argument("-u", "--uid",
82 help="trace this UID only")
83parser.add_argument("--count", action="store_true",
84 help="count binds per src ip and port")
85parser.add_argument("--cgroupmap",
86 help="trace cgroups in this BPF map only")
87parser.add_argument("--ebpf", action="store_true",
88 help=argparse.SUPPRESS)
89parser.add_argument("--debug-source", action="store_true",
90 help=argparse.SUPPRESS)
91args = parser.parse_args()
92
93# define BPF program
94bpf_text = """
95#include <uapi/linux/ptrace.h>
96#pragma clang diagnostic push
97#pragma clang diagnostic ignored "-Wtautological-compare"
98#include <net/sock.h>
99#pragma clang diagnostic pop
100#include <net/inet_sock.h>
101#include <net/net_namespace.h>
102#include <bcc/proto.h>
103
104BPF_HASH(currsock, u32, struct socket *);
105
106// separate data structs for ipv4 and ipv6
107struct ipv4_bind_data_t {
108 u64 ts_us;
109 u32 pid;
110 u32 uid;
111 u64 ip;
112 u32 saddr;
113 u32 bound_dev_if;
114 int return_code;
115 u16 sport;
116 u8 socket_options;
117 u8 protocol;
118 char task[TASK_COMM_LEN];
119};
120BPF_PERF_OUTPUT(ipv4_bind_events);
121
122struct ipv6_bind_data_t {
123 // int128 would be aligned on 16 bytes boundary, better to go first
124 unsigned __int128 saddr;
125 u64 ts_us;
126 u32 pid;
127 u32 uid;
128 u64 ip;
129 u32 bound_dev_if;
130 int return_code;
131 u16 sport;
132 u8 socket_options;
133 u8 protocol;
134 char task[TASK_COMM_LEN];
135};
136BPF_PERF_OUTPUT(ipv6_bind_events);
137
138// separate flow keys per address family
139struct ipv4_flow_key_t {
140 u32 saddr;
141 u16 sport;
142};
143BPF_HASH(ipv4_count, struct ipv4_flow_key_t);
144
145struct ipv6_flow_key_t {
146 unsigned __int128 saddr;
147 u16 sport;
148};
149BPF_HASH(ipv6_count, struct ipv6_flow_key_t);
150
151CGROUP_MAP
152
153// bind options for event reporting
154union bind_options {
155 u8 data;
156 struct {
157 u8 freebind:1;
158 u8 transparent:1;
159 u8 bind_address_no_port:1;
160 u8 reuseaddress:1;
161 u8 reuseport:1;
162 } fields;
163};
164
165// TODO: add reporting for the original bind arguments
166int bindsnoop_entry(struct pt_regs *ctx, struct socket *socket)
167{
168 u64 pid_tgid = bpf_get_current_pid_tgid();
169 u32 pid = pid_tgid >> 32;
170 u32 tid = pid_tgid;
171 FILTER_PID
172
173 u32 uid = bpf_get_current_uid_gid();
174
175 FILTER_UID
176
177 FILTER_CGROUP
178
179 // stash the sock ptr for lookup on return
180 currsock.update(&tid, &socket);
181
182 return 0;
183};
184
185
186static int bindsnoop_return(struct pt_regs *ctx, short ipver)
187{
188 int ret = PT_REGS_RC(ctx);
189 u64 pid_tgid = bpf_get_current_pid_tgid();
190 u32 pid = pid_tgid >> 32;
191 u32 tid = pid_tgid;
192
193 struct socket **skpp;
194 skpp = currsock.lookup(&tid);
195 if (skpp == 0) {
196 return 0; // missed entry
197 }
198
199 int ignore_errors = 1;
200 FILTER_ERRORS
201 if (ret != 0 && ignore_errors) {
202 // failed to bind
203 currsock.delete(&tid);
204 return 0;
205 }
206
207 // pull in details
208 struct socket *skp_ = *skpp;
209 struct sock *skp = skp_->sk;
210
211 struct inet_sock *sockp = (struct inet_sock *)skp;
212
213 u16 sport = 0;
214 bpf_probe_read(&sport, sizeof(sport), &sockp->inet_sport);
215 sport = ntohs(sport);
216
217 FILTER_PORT
218
219 union bind_options opts = {0};
220 u8 bitfield;
221 // fetching freebind, transparent, and bind_address_no_port bitfields
222 // via the next struct member, rcv_tos
223 bitfield = (u8) *(&sockp->rcv_tos - 2) & 0xFF;
224 // IP_FREEBIND (sockp->freebind)
225 opts.fields.freebind = bitfield >> 2 & 0x01;
226 // IP_TRANSPARENT (sockp->transparent)
227 opts.fields.transparent = bitfield >> 5 & 0x01;
228 // IP_BIND_ADDRESS_NO_PORT (sockp->bind_address_no_port)
229 opts.fields.bind_address_no_port = *(&sockp->rcv_tos - 1) & 0x01;
230
231 // SO_REUSEADDR and SO_REUSEPORT are bitfields that
232 // cannot be accessed directly, fetched via the next struct member,
233 // __sk_common.skc_bound_dev_if
234 bitfield = *((u8*)&skp->__sk_common.skc_bound_dev_if - 1);
235 // SO_REUSEADDR (skp->reuse)
236 // it is 4 bit, but we are interested in the lowest one
237 opts.fields.reuseaddress = bitfield & 0x0F;
238 // SO_REUSEPORT (skp->reuseport)
239 opts.fields.reuseport = bitfield >> 4 & 0x01;
240
241 // workaround for reading the sk_protocol bitfield (from tcpaccept.py):
242 u8 protocol;
243 int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs);
244 int sk_lingertime_offset = offsetof(struct sock, sk_lingertime);
245 if (sk_lingertime_offset - gso_max_segs_offset == 4)
246 // 4.10+ with little endian
247#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
248 protocol = *(u8 *)((u64)&skp->sk_gso_max_segs - 3);
249 else
250 // pre-4.10 with little endian
251 protocol = *(u8 *)((u64)&skp->sk_wmem_queued - 3);
252#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
253 // 4.10+ with big endian
254 protocol = *(u8 *)((u64)&skp->sk_gso_max_segs - 1);
255 else
256 // pre-4.10 with big endian
257 protocol = *(u8 *)((u64)&skp->sk_wmem_queued - 1);
258#else
259# error "Fix your compiler's __BYTE_ORDER__?!"
260#endif
261
262 if (ipver == 4) {
263 IPV4_CODE
264 } else /* 6 */ {
265 IPV6_CODE
266 }
267
268 currsock.delete(&tid);
269
270 return 0;
271}
272
273int bindsnoop_v4_return(struct pt_regs *ctx)
274{
275 return bindsnoop_return(ctx, 4);
276}
277
278int bindsnoop_v6_return(struct pt_regs *ctx)
279{
280 return bindsnoop_return(ctx, 6);
281}
282"""
283
284struct_init = {
285 'ipv4': {
286 'count': """
287 struct ipv4_flow_key_t flow_key = {};
288 flow_key.saddr = skp->__sk_common.skc_rcv_saddr;
289 flow_key.sport = sport;
290 ipv4_count.increment(flow_key);""",
291 'trace': """
292 struct ipv4_bind_data_t data4 = {.pid = pid, .ip = ipver};
293 data4.uid = bpf_get_current_uid_gid();
294 data4.ts_us = bpf_ktime_get_ns() / 1000;
295 bpf_probe_read(
296 &data4.saddr, sizeof(data4.saddr), &sockp->inet_saddr);
297 data4.return_code = ret;
298 data4.sport = sport;
299 data4.bound_dev_if = skp->__sk_common.skc_bound_dev_if;
300 data4.socket_options = opts.data;
301 data4.protocol = protocol;
302 bpf_get_current_comm(&data4.task, sizeof(data4.task));
303 ipv4_bind_events.perf_submit(ctx, &data4, sizeof(data4));"""
304 },
305 'ipv6': {
306 'count': """
307 struct ipv6_flow_key_t flow_key = {};
308 bpf_probe_read(&flow_key.saddr, sizeof(flow_key.saddr),
309 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
310 flow_key.sport = sport;
311 ipv6_count.increment(flow_key);""",
312 'trace': """
313 struct ipv6_bind_data_t data6 = {.pid = pid, .ip = ipver};
314 data6.uid = bpf_get_current_uid_gid();
315 data6.ts_us = bpf_ktime_get_ns() / 1000;
316 bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
317 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
318 data6.return_code = ret;
319 data6.sport = sport;
320 data6.bound_dev_if = skp->__sk_common.skc_bound_dev_if;
321 data6.socket_options = opts.data;
322 data6.protocol = protocol;
323 bpf_get_current_comm(&data6.task, sizeof(data6.task));
324 ipv6_bind_events.perf_submit(ctx, &data6, sizeof(data6));"""
325 },
326 'filter_cgroup': """
327 u64 cgroupid = bpf_get_current_cgroup_id();
328 if (cgroupset.lookup(&cgroupid) == NULL) {
329 return 0;
330 }""",
331}
332
333# code substitutions
334if args.count:
335 bpf_text = bpf_text.replace("IPV4_CODE", struct_init['ipv4']['count'])
336 bpf_text = bpf_text.replace("IPV6_CODE", struct_init['ipv6']['count'])
337else:
338 bpf_text = bpf_text.replace("IPV4_CODE", struct_init['ipv4']['trace'])
339 bpf_text = bpf_text.replace("IPV6_CODE", struct_init['ipv6']['trace'])
340
341if args.pid:
342 bpf_text = bpf_text.replace('FILTER_PID',
343 'if (pid != %s) { return 0; }' % args.pid)
344if args.port:
345 sports = [int(sport) for sport in args.port.split(',')]
346 sports_if = ' && '.join(['sport != %d' % sport for sport in sports])
347 bpf_text = bpf_text.replace('FILTER_PORT',
348 'if (%s) { currsock.delete(&pid); return 0; }' % sports_if)
349if args.uid:
350 bpf_text = bpf_text.replace('FILTER_UID',
351 'if (uid != %s) { return 0; }' % args.uid)
352if args.errors:
353 bpf_text = bpf_text.replace('FILTER_ERRORS', 'ignore_errors = 0;')
354if args.cgroupmap:
355 bpf_text = bpf_text.replace('FILTER_CGROUP', struct_init['filter_cgroup'])
356 bpf_text = bpf_text.replace(
357 'CGROUP_MAP',
358 (
359 'BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "%s");' %
360 args.cgroupmap
361 )
362 )
363
364bpf_text = bpf_text.replace('FILTER_PID', '')
365bpf_text = bpf_text.replace('FILTER_PORT', '')
366bpf_text = bpf_text.replace('FILTER_UID', '')
367bpf_text = bpf_text.replace('FILTER_ERRORS', '')
368bpf_text = bpf_text.replace('FILTER_CGROUP', '')
369bpf_text = bpf_text.replace('CGROUP_MAP', '')
370
371# selecting output format - 80 characters or wide, fitting IPv6 addresses
372header_fmt = "%8s %-12.12s %-4s %-15s %-5s %5s %2s"
373output_fmt = b"%8d %-12.12s %-4.4s %-15.15s %5d %-5s %2d"
374error_header_fmt = "%3s "
375error_output_fmt = b"%3s "
376error_value_fmt = str
377if args.wide:
378 header_fmt = "%10s %-12.12s %-4s %-39s %-5s %5s %2s"
379 output_fmt = b"%10d %-12.12s %-4s %-39s %5d %-5s %2d"
380 error_header_fmt = "%-25s "
381 error_output_fmt = b"%-25s "
382 error_value_fmt = strerror
383
384if args.ebpf:
385 print(bpf_text)
386 exit()
387
388# L4 protocol resolver
389class L4Proto:
390 def __init__(self):
391 self.num2str = {}
392 proto_re = re.compile("IPPROTO_(.*)")
393 for attr in socket_all:
394 proto_match = proto_re.match(attr)
395 if proto_match:
396 self.num2str[socket_dct[attr]] = proto_match.group(1)
397
398 def proto2str(self, proto):
399 return self.num2str.get(proto, "UNKNOWN")
400
401l4 = L4Proto()
402
403# bind options:
404# SOL_IP IP_FREEBIND F....
405# SOL_IP IP_TRANSPARENT .T...
406# SOL_IP IP_BIND_ADDRESS_NO_PORT ..N..
407# SOL_SOCKET SO_REUSEADDR ...R.
408# SOL_SOCKET SO_REUSEPORT ....r
409def opts2str(bitfield):
410 str_options = ""
411 bit = 1
412 for opt in "FTNRr":
413 str_options += opt if bitfield & bit else "."
414 bit *= 2
415 return str_options.encode()
416
417
418# process events
419def print_ipv4_bind_event(cpu, data, size):
420 event = b["ipv4_bind_events"].event(data)
421 global start_ts
422 if args.timestamp:
423 if start_ts == 0:
424 start_ts = event.ts_us
425 printb(b"%-9.6f " % ((float(event.ts_us) - start_ts) / 1000000), nl="")
426 if args.print_uid:
427 printb(b"%6d " % event.uid, nl="")
428 if args.errors:
429 printb(
430 error_output_fmt % error_value_fmt(event.return_code).encode(),
431 nl="",
432 )
433 printb(output_fmt % (event.pid, event.task,
434 l4.proto2str(event.protocol).encode(),
435 inet_ntop(AF_INET, pack("I", event.saddr)).encode(),
436 event.sport, opts2str(event.socket_options), event.bound_dev_if))
437
438
439def print_ipv6_bind_event(cpu, data, size):
440 event = b["ipv6_bind_events"].event(data)
441 global start_ts
442 if args.timestamp:
443 if start_ts == 0:
444 start_ts = event.ts_us
445 printb(b"%-9.6f " % ((float(event.ts_us) - start_ts) / 1000000), nl="")
446 if args.print_uid:
447 printb(b"%6d " % event.uid, nl="")
448 if args.errors:
449 printb(
450 error_output_fmt % error_value_fmt(event.return_code).encode(),
451 nl="",
452 )
453 printb(output_fmt % (event.pid, event.task,
454 l4.proto2str(event.protocol).encode(),
455 inet_ntop(AF_INET6, event.saddr).encode(),
456 event.sport, opts2str(event.socket_options), event.bound_dev_if))
457
458
459def depict_cnt(counts_tab, l3prot='ipv4'):
460 for k, v in sorted(
461 counts_tab.items(), key=lambda counts: counts[1].value, reverse=True
462 ):
463 depict_key = ""
464 if l3prot == 'ipv4':
465 depict_key = "%-32s %20s" % (
466 (inet_ntop(AF_INET, pack('I', k.saddr))), k.sport
467 )
468 else:
469 depict_key = "%-32s %20s" % (
470 (inet_ntop(AF_INET6, k.saddr)), k.sport
471 )
472 print("%s %-10d" % (depict_key, v.value))
473
474
475# initialize BPF
476b = BPF(text=bpf_text)
477b.attach_kprobe(event="inet_bind", fn_name="bindsnoop_entry")
478b.attach_kprobe(event="inet6_bind", fn_name="bindsnoop_entry")
479b.attach_kretprobe(event="inet_bind", fn_name="bindsnoop_v4_return")
480b.attach_kretprobe(event="inet6_bind", fn_name="bindsnoop_v6_return")
481
482print("Tracing binds ... Hit Ctrl-C to end")
483if args.count:
484 try:
485 while 1:
486 sleep(99999999)
487 except KeyboardInterrupt:
488 pass
489
490 # header
491 print("\n%-32s %20s %-10s" % (
492 "LADDR", "LPORT", "BINDS"))
493 depict_cnt(b["ipv4_count"])
494 depict_cnt(b["ipv6_count"], l3prot='ipv6')
495# read events
496else:
497 # header
498 if args.timestamp:
499 print("%-9s " % ("TIME(s)"), end="")
500 if args.print_uid:
501 print("%6s " % ("UID"), end="")
502 if args.errors:
503 print(error_header_fmt % ("RC"), end="")
504 print(header_fmt % ("PID", "COMM", "PROT", "ADDR", "PORT", "OPTS", "IF"))
505
506 start_ts = 0
507
508 # read events
509 b["ipv4_bind_events"].open_perf_buffer(print_ipv4_bind_event)
510 b["ipv6_bind_events"].open_perf_buffer(print_ipv6_bind_event)
511 while 1:
512 try:
513 b.perf_buffer_poll()
514 except KeyboardInterrupt:
515 exit()