tools: add filtering by mount namespace
In previous patches, I added the option --cgroupmap to filter events
belonging to a set of cgroup-v2. Although this approach works fine with
systemd services and containers when cgroup-v2 is enabled, it does not
work with containers when only cgroup-v1 is enabled because
bpf_get_current_cgroup_id() only works with cgroup-v2. It also requires
Linux 4.18 to get this bpf helper function.
This patch adds an additional way to filter by containers, using mount
namespaces.
Note that this does not help with systemd services since they normally
don't create a new mount namespace (unless you set some options like
'ReadOnlyPaths=', see "man 5 systemd.exec").
My goal with this patch is to filter Kubernetes pods, even on
distributions with an older kernel (<4.18) or without cgroup-v2 enabled.
- This is only implemented for tools that already support filtering by
cgroup id (bindsnoop, capable, execsnoop, profile, tcpaccept, tcpconnect,
tcptop and tcptracer).
- I picked the mount namespace because the other namespaces could be
disabled in Kubernetes (e.g. HostNetwork, HostPID, HostIPC).
It can be tested by following the example in docs/special_filtering added
in this commit, to avoid compiling locally the following command can be used
```
sudo bpftool map create /sys/fs/bpf/mnt_ns_set type hash key 8 value 4 \
entries 128 name mnt_ns_set flags 0
docker run -ti --rm --privileged \
-v /usr/src:/usr/src -v /lib/modules:/lib/modules \
-v /sys/fs/bpf:/sys/fs/bpf --pid=host kinvolk/bcc:alban-containers-filters \
/usr/share/bcc/tools/execsnoop --mntnsmap /sys/fs/bpf/mnt_ns_set
```
Co-authored-by: Alban Crequy <alban@kinvolk.io>
Co-authored-by: Mauricio Vásquez <mauricio@kinvolk.io>
diff --git a/tools/tcptop.py b/tools/tcptop.py
index 9fb3ca2..510c4e8 100755
--- a/tools/tcptop.py
+++ b/tools/tcptop.py
@@ -26,6 +26,7 @@
from __future__ import print_function
from bcc import BPF
+from bcc.containers import filter_by_containers
import argparse
from socket import inet_ntop, AF_INET, AF_INET6
from struct import pack
@@ -45,7 +46,8 @@
./tcptop # trace TCP send/recv by host
./tcptop -C # don't clear the screen
./tcptop -p 181 # only trace PID 181
- ./tcptop --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./tcptop --cgroupmap mappath # only trace cgroups in this BPF map
+ ./tcptop --mntnsmap mappath # only trace mount namespaces in the map
"""
parser = argparse.ArgumentParser(
description="Summarize TCP send/recv throughput by host",
@@ -63,6 +65,8 @@
help="number of outputs")
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+ help="trace mount namespaces in this BPF map only")
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
@@ -98,21 +102,16 @@
BPF_HASH(ipv6_send_bytes, struct ipv6_key_t);
BPF_HASH(ipv6_recv_bytes, struct ipv6_key_t);
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk,
struct msghdr *msg, size_t size)
{
- u32 pid = bpf_get_current_pid_tgid() >> 32;
- FILTER
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
+ if (container_should_be_filtered()) {
return 0;
}
-#endif
+
+ u32 pid = bpf_get_current_pid_tgid() >> 32;
+ FILTER_PID
+
u16 dport = 0, family = sk->__sk_common.skc_family;
if (family == AF_INET) {
@@ -148,14 +147,13 @@
*/
int kprobe__tcp_cleanup_rbuf(struct pt_regs *ctx, struct sock *sk, int copied)
{
- u32 pid = bpf_get_current_pid_tgid() >> 32;
- FILTER
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
+ if (container_should_be_filtered()) {
return 0;
}
-#endif
+
+ u32 pid = bpf_get_current_pid_tgid() >> 32;
+ FILTER_PID
+
u16 dport = 0, family = sk->__sk_common.skc_family;
u64 *val, zero = 0;
@@ -190,15 +188,11 @@
# code substitutions
if args.pid:
- bpf_text = bpf_text.replace('FILTER',
+ bpf_text = bpf_text.replace('FILTER_PID',
'if (pid != %s) { return 0; }' % args.pid)
else:
- bpf_text = bpf_text.replace('FILTER', '')
-if args.cgroupmap:
- bpf_text = bpf_text.replace('CGROUPSET', '1')
- bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
- bpf_text = bpf_text.replace('CGROUPSET', '0')
+ bpf_text = bpf_text.replace('FILTER_PID', '')
+bpf_text = filter_by_containers(args) + bpf_text
if debug or args.ebpf:
print(bpf_text)
if args.ebpf: