Omar Sandoval | e822a81 | 2016-10-16 12:31:32 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # |
| 3 | # mountsnoop Trace mount() and umount syscalls. |
| 4 | # For Linux, uses BCC, eBPF. Embedded C. |
| 5 | # |
| 6 | # USAGE: mountsnoop [-h] |
| 7 | # |
| 8 | # Copyright (c) 2016 Facebook, Inc. |
| 9 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 10 | # |
| 11 | # 14-Oct-2016 Omar Sandoval Created this. |
| 12 | |
| 13 | from __future__ import print_function |
| 14 | import argparse |
| 15 | import bcc |
| 16 | import ctypes |
| 17 | import errno |
| 18 | import functools |
| 19 | import sys |
| 20 | |
| 21 | |
| 22 | bpf_text = r""" |
| 23 | #include <uapi/linux/ptrace.h> |
| 24 | #include <linux/sched.h> |
| 25 | |
| 26 | #include <linux/nsproxy.h> |
| 27 | #include <linux/ns_common.h> |
| 28 | |
| 29 | /* |
| 30 | * XXX: struct mnt_namespace is defined in fs/mount.h, which is private to the |
| 31 | * VFS and not installed in any kernel-devel packages. So, let's duplicate the |
| 32 | * important part of the definition. There are actually more members in the |
| 33 | * real struct, but we don't need them, and they're more likely to change. |
| 34 | */ |
| 35 | struct mnt_namespace { |
| 36 | atomic_t count; |
| 37 | struct ns_common ns; |
| 38 | }; |
| 39 | |
| 40 | /* |
| 41 | * XXX: this could really use first-class string support in BPF. target is a |
| 42 | * NUL-terminated path up to PATH_MAX in length. source and type are |
| 43 | * NUL-terminated strings up to PAGE_SIZE in length. data is a weird case: it's |
| 44 | * almost always a NUL-terminated string, but for some filesystems (e.g., older |
| 45 | * NFS variants), it's a binary structure with plenty of NUL bytes, so the |
| 46 | * kernel always copies up to PAGE_SIZE bytes, stopping when it hits a fault. |
| 47 | * |
| 48 | * The best we can do with the existing BPF helpers is to copy as much of each |
| 49 | * argument as we can. Our stack space is limited, and we need to leave some |
| 50 | * headroom for the rest of the function, so this should be a decent value. |
| 51 | */ |
| 52 | #define MAX_STR_LEN 412 |
| 53 | |
| 54 | enum event_type { |
| 55 | EVENT_MOUNT, |
| 56 | EVENT_MOUNT_SOURCE, |
| 57 | EVENT_MOUNT_TARGET, |
| 58 | EVENT_MOUNT_TYPE, |
| 59 | EVENT_MOUNT_DATA, |
| 60 | EVENT_MOUNT_RET, |
| 61 | EVENT_UMOUNT, |
| 62 | EVENT_UMOUNT_TARGET, |
| 63 | EVENT_UMOUNT_RET, |
| 64 | }; |
| 65 | |
| 66 | struct data_t { |
| 67 | enum event_type type; |
| 68 | pid_t pid, tgid; |
| 69 | union { |
| 70 | /* EVENT_MOUNT, EVENT_UMOUNT */ |
| 71 | struct { |
| 72 | /* current->nsproxy->mnt_ns->ns.inum */ |
| 73 | unsigned int mnt_ns; |
| 74 | char comm[TASK_COMM_LEN]; |
| 75 | unsigned long flags; |
| 76 | } enter; |
| 77 | /* |
| 78 | * EVENT_MOUNT_SOURCE, EVENT_MOUNT_TARGET, EVENT_MOUNT_TYPE, |
| 79 | * EVENT_MOUNT_DATA, EVENT_UMOUNT_TARGET |
| 80 | */ |
| 81 | char str[MAX_STR_LEN]; |
| 82 | /* EVENT_MOUNT_RET, EVENT_UMOUNT_RET */ |
| 83 | int retval; |
| 84 | }; |
| 85 | }; |
| 86 | |
| 87 | BPF_PERF_OUTPUT(events); |
| 88 | |
| 89 | int kprobe__sys_mount(struct pt_regs *ctx, char __user *source, |
| 90 | char __user *target, char __user *type, |
| 91 | unsigned long flags) |
| 92 | { |
| 93 | /* sys_mount takes too many arguments */ |
| 94 | char __user *data = (char __user *)PT_REGS_PARM5(ctx); |
| 95 | struct data_t event = {}; |
| 96 | struct task_struct *task; |
| 97 | struct nsproxy *nsproxy; |
| 98 | struct mnt_namespace *mnt_ns; |
| 99 | |
| 100 | event.pid = bpf_get_current_pid_tgid() & 0xffffffff; |
| 101 | event.tgid = bpf_get_current_pid_tgid() >> 32; |
| 102 | |
| 103 | event.type = EVENT_MOUNT; |
| 104 | bpf_get_current_comm(event.enter.comm, sizeof(event.enter.comm)); |
| 105 | event.enter.flags = flags; |
| 106 | task = (struct task_struct *)bpf_get_current_task(); |
Paul Chaignon | 719e100 | 2017-08-06 14:33:20 +0200 | [diff] [blame] | 107 | nsproxy = task->nsproxy; |
| 108 | mnt_ns = nsproxy->mnt_ns; |
| 109 | event.enter.mnt_ns = mnt_ns->ns.inum; |
Omar Sandoval | e822a81 | 2016-10-16 12:31:32 -0700 | [diff] [blame] | 110 | events.perf_submit(ctx, &event, sizeof(event)); |
| 111 | |
| 112 | event.type = EVENT_MOUNT_SOURCE; |
| 113 | memset(event.str, 0, sizeof(event.str)); |
| 114 | bpf_probe_read(event.str, sizeof(event.str), source); |
| 115 | events.perf_submit(ctx, &event, sizeof(event)); |
| 116 | |
| 117 | event.type = EVENT_MOUNT_TARGET; |
| 118 | memset(event.str, 0, sizeof(event.str)); |
| 119 | bpf_probe_read(event.str, sizeof(event.str), target); |
| 120 | events.perf_submit(ctx, &event, sizeof(event)); |
| 121 | |
| 122 | event.type = EVENT_MOUNT_TYPE; |
| 123 | memset(event.str, 0, sizeof(event.str)); |
| 124 | bpf_probe_read(event.str, sizeof(event.str), type); |
| 125 | events.perf_submit(ctx, &event, sizeof(event)); |
| 126 | |
| 127 | event.type = EVENT_MOUNT_DATA; |
| 128 | memset(event.str, 0, sizeof(event.str)); |
| 129 | bpf_probe_read(event.str, sizeof(event.str), data); |
| 130 | events.perf_submit(ctx, &event, sizeof(event)); |
| 131 | |
| 132 | return 0; |
| 133 | } |
| 134 | |
| 135 | int kretprobe__sys_mount(struct pt_regs *ctx) |
| 136 | { |
| 137 | struct data_t event = {}; |
| 138 | |
| 139 | event.type = EVENT_MOUNT_RET; |
| 140 | event.pid = bpf_get_current_pid_tgid() & 0xffffffff; |
| 141 | event.tgid = bpf_get_current_pid_tgid() >> 32; |
| 142 | event.retval = PT_REGS_RC(ctx); |
| 143 | events.perf_submit(ctx, &event, sizeof(event)); |
| 144 | |
| 145 | return 0; |
| 146 | } |
| 147 | |
| 148 | int kprobe__sys_umount(struct pt_regs *ctx, char __user *target, int flags) |
| 149 | { |
| 150 | struct data_t event = {}; |
| 151 | struct task_struct *task; |
| 152 | struct nsproxy *nsproxy; |
| 153 | struct mnt_namespace *mnt_ns; |
| 154 | |
| 155 | event.pid = bpf_get_current_pid_tgid() & 0xffffffff; |
| 156 | event.tgid = bpf_get_current_pid_tgid() >> 32; |
| 157 | |
| 158 | event.type = EVENT_UMOUNT; |
| 159 | bpf_get_current_comm(event.enter.comm, sizeof(event.enter.comm)); |
| 160 | event.enter.flags = flags; |
| 161 | task = (struct task_struct *)bpf_get_current_task(); |
Paul Chaignon | 719e100 | 2017-08-06 14:33:20 +0200 | [diff] [blame] | 162 | nsproxy = task->nsproxy; |
| 163 | mnt_ns = nsproxy->mnt_ns; |
| 164 | event.enter.mnt_ns = mnt_ns->ns.inum; |
Omar Sandoval | e822a81 | 2016-10-16 12:31:32 -0700 | [diff] [blame] | 165 | events.perf_submit(ctx, &event, sizeof(event)); |
| 166 | |
| 167 | event.type = EVENT_UMOUNT_TARGET; |
| 168 | memset(event.str, 0, sizeof(event.str)); |
| 169 | bpf_probe_read(event.str, sizeof(event.str), target); |
| 170 | events.perf_submit(ctx, &event, sizeof(event)); |
| 171 | |
| 172 | return 0; |
| 173 | } |
| 174 | |
| 175 | int kretprobe__sys_umount(struct pt_regs *ctx) |
| 176 | { |
| 177 | struct data_t event = {}; |
| 178 | |
| 179 | event.type = EVENT_UMOUNT_RET; |
| 180 | event.pid = bpf_get_current_pid_tgid() & 0xffffffff; |
| 181 | event.tgid = bpf_get_current_pid_tgid() >> 32; |
| 182 | event.retval = PT_REGS_RC(ctx); |
| 183 | events.perf_submit(ctx, &event, sizeof(event)); |
| 184 | |
| 185 | return 0; |
| 186 | } |
| 187 | """ |
| 188 | |
| 189 | # sys/mount.h |
| 190 | MS_MGC_VAL = 0xc0ed0000 |
| 191 | MS_MGC_MSK = 0xffff0000 |
| 192 | MOUNT_FLAGS = [ |
| 193 | ('MS_RDONLY', 1), |
| 194 | ('MS_NOSUID', 2), |
| 195 | ('MS_NODEV', 4), |
| 196 | ('MS_NOEXEC', 8), |
| 197 | ('MS_SYNCHRONOUS', 16), |
| 198 | ('MS_REMOUNT', 32), |
| 199 | ('MS_MANDLOCK', 64), |
| 200 | ('MS_DIRSYNC', 128), |
| 201 | ('MS_NOATIME', 1024), |
| 202 | ('MS_NODIRATIME', 2048), |
| 203 | ('MS_BIND', 4096), |
| 204 | ('MS_MOVE', 8192), |
| 205 | ('MS_REC', 16384), |
| 206 | ('MS_SILENT', 32768), |
| 207 | ('MS_POSIXACL', 1 << 16), |
| 208 | ('MS_UNBINDABLE', 1 << 17), |
| 209 | ('MS_PRIVATE', 1 << 18), |
| 210 | ('MS_SLAVE', 1 << 19), |
| 211 | ('MS_SHARED', 1 << 20), |
| 212 | ('MS_RELATIME', 1 << 21), |
| 213 | ('MS_KERNMOUNT', 1 << 22), |
| 214 | ('MS_I_VERSION', 1 << 23), |
| 215 | ('MS_STRICTATIME', 1 << 24), |
| 216 | ('MS_LAZYTIME', 1 << 25), |
| 217 | ('MS_ACTIVE', 1 << 30), |
| 218 | ('MS_NOUSER', 1 << 31), |
| 219 | ] |
| 220 | UMOUNT_FLAGS = [ |
| 221 | ('MNT_FORCE', 1), |
| 222 | ('MNT_DETACH', 2), |
| 223 | ('MNT_EXPIRE', 4), |
| 224 | ('UMOUNT_NOFOLLOW', 8), |
| 225 | ] |
| 226 | |
| 227 | |
| 228 | TASK_COMM_LEN = 16 # linux/sched.h |
| 229 | MAX_STR_LEN = 412 |
| 230 | |
| 231 | |
| 232 | class EventType(object): |
| 233 | EVENT_MOUNT = 0 |
| 234 | EVENT_MOUNT_SOURCE = 1 |
| 235 | EVENT_MOUNT_TARGET = 2 |
| 236 | EVENT_MOUNT_TYPE = 3 |
| 237 | EVENT_MOUNT_DATA = 4 |
| 238 | EVENT_MOUNT_RET = 5 |
| 239 | EVENT_UMOUNT = 6 |
| 240 | EVENT_UMOUNT_TARGET = 7 |
| 241 | EVENT_UMOUNT_RET = 8 |
| 242 | |
| 243 | |
| 244 | class EnterData(ctypes.Structure): |
| 245 | _fields_ = [ |
| 246 | ('mnt_ns', ctypes.c_uint), |
| 247 | ('comm', ctypes.c_char * TASK_COMM_LEN), |
| 248 | ('flags', ctypes.c_ulong), |
| 249 | ] |
| 250 | |
| 251 | |
| 252 | class DataUnion(ctypes.Union): |
| 253 | _fields_ = [ |
| 254 | ('enter', EnterData), |
| 255 | ('str', ctypes.c_char * MAX_STR_LEN), |
| 256 | ('retval', ctypes.c_int), |
| 257 | ] |
| 258 | |
| 259 | |
| 260 | class Event(ctypes.Structure): |
| 261 | _fields_ = [ |
| 262 | ('type', ctypes.c_uint), |
| 263 | ('pid', ctypes.c_uint), |
| 264 | ('tgid', ctypes.c_uint), |
| 265 | ('union', DataUnion), |
| 266 | ] |
| 267 | |
| 268 | |
| 269 | def _decode_flags(flags, flag_list): |
| 270 | str_flags = [] |
| 271 | for flag, bit in flag_list: |
| 272 | if flags & bit: |
| 273 | str_flags.append(flag) |
| 274 | flags &= ~bit |
| 275 | if flags or not str_flags: |
| 276 | str_flags.append('0x{:x}'.format(flags)) |
| 277 | return str_flags |
| 278 | |
| 279 | |
| 280 | def decode_flags(flags, flag_list): |
| 281 | return '|'.join(_decode_flags(flags, flag_list)) |
| 282 | |
| 283 | |
| 284 | def decode_mount_flags(flags): |
| 285 | str_flags = [] |
| 286 | if flags & MS_MGC_MSK == MS_MGC_VAL: |
| 287 | flags &= ~MS_MGC_MSK |
| 288 | str_flags.append('MS_MGC_VAL') |
| 289 | str_flags.extend(_decode_flags(flags, MOUNT_FLAGS)) |
| 290 | return '|'.join(str_flags) |
| 291 | |
| 292 | |
| 293 | def decode_umount_flags(flags): |
| 294 | return decode_flags(flags, UMOUNT_FLAGS) |
| 295 | |
| 296 | |
| 297 | def decode_errno(retval): |
| 298 | try: |
| 299 | return '-' + errno.errorcode[-retval] |
| 300 | except KeyError: |
| 301 | return str(retval) |
| 302 | |
| 303 | |
| 304 | _escape_chars = { |
| 305 | ord('\a'): '\\a', |
| 306 | ord('\b'): '\\b', |
| 307 | ord('\t'): '\\t', |
| 308 | ord('\n'): '\\n', |
| 309 | ord('\v'): '\\v', |
| 310 | ord('\f'): '\\f', |
| 311 | ord('\r'): '\\r', |
| 312 | ord('"'): '\\"', |
| 313 | ord('\\'): '\\\\', |
| 314 | } |
| 315 | |
| 316 | |
| 317 | def escape_character(c): |
| 318 | try: |
| 319 | return _escape_chars[c] |
| 320 | except KeyError: |
| 321 | if 0x20 <= c <= 0x7e: |
| 322 | return chr(c) |
| 323 | else: |
| 324 | return '\\x{:02x}'.format(c) |
| 325 | |
| 326 | |
| 327 | if sys.version_info.major < 3: |
| 328 | def decode_mount_string(s): |
| 329 | return '"{}"'.format(''.join(escape_character(ord(c)) for c in s)) |
| 330 | else: |
| 331 | def decode_mount_string(s): |
| 332 | return '"{}"'.format(''.join(escape_character(c) for c in s)) |
| 333 | |
| 334 | |
| 335 | def print_event(mounts, umounts, cpu, data, size): |
| 336 | event = ctypes.cast(data, ctypes.POINTER(Event)).contents |
| 337 | |
| 338 | try: |
| 339 | if event.type == EventType.EVENT_MOUNT: |
| 340 | mounts[event.pid] = { |
| 341 | 'pid': event.pid, |
| 342 | 'tgid': event.tgid, |
| 343 | 'mnt_ns': event.union.enter.mnt_ns, |
| 344 | 'comm': event.union.enter.comm, |
| 345 | 'flags': event.union.enter.flags, |
| 346 | } |
| 347 | elif event.type == EventType.EVENT_MOUNT_SOURCE: |
| 348 | mounts[event.pid]['source'] = event.union.str |
| 349 | elif event.type == EventType.EVENT_MOUNT_TARGET: |
| 350 | mounts[event.pid]['target'] = event.union.str |
| 351 | elif event.type == EventType.EVENT_MOUNT_TYPE: |
| 352 | mounts[event.pid]['type'] = event.union.str |
| 353 | elif event.type == EventType.EVENT_MOUNT_DATA: |
| 354 | # XXX: data is not always a NUL-terminated string |
| 355 | mounts[event.pid]['data'] = event.union.str |
| 356 | elif event.type == EventType.EVENT_UMOUNT: |
| 357 | umounts[event.pid] = { |
| 358 | 'pid': event.pid, |
| 359 | 'tgid': event.tgid, |
| 360 | 'mnt_ns': event.union.enter.mnt_ns, |
| 361 | 'comm': event.union.enter.comm, |
| 362 | 'flags': event.union.enter.flags, |
| 363 | } |
| 364 | elif event.type == EventType.EVENT_UMOUNT_TARGET: |
| 365 | umounts[event.pid]['target'] = event.union.str |
| 366 | elif (event.type == EventType.EVENT_MOUNT_RET or |
| 367 | event.type == EventType.EVENT_UMOUNT_RET): |
| 368 | if event.type == EventType.EVENT_MOUNT_RET: |
| 369 | syscall = mounts.pop(event.pid) |
Sasha Goldshtein | f41ae86 | 2016-10-19 01:14:30 +0300 | [diff] [blame] | 370 | call = ('mount({source}, {target}, {type}, {flags}, {data}) ' + |
| 371 | '= {retval}').format( |
Omar Sandoval | e822a81 | 2016-10-16 12:31:32 -0700 | [diff] [blame] | 372 | source=decode_mount_string(syscall['source']), |
| 373 | target=decode_mount_string(syscall['target']), |
| 374 | type=decode_mount_string(syscall['type']), |
| 375 | flags=decode_mount_flags(syscall['flags']), |
| 376 | data=decode_mount_string(syscall['data']), |
| 377 | retval=decode_errno(event.union.retval)) |
| 378 | else: |
| 379 | syscall = umounts.pop(event.pid) |
| 380 | call = 'umount({target}, {flags}) = {retval}'.format( |
| 381 | target=decode_mount_string(syscall['target']), |
| 382 | flags=decode_umount_flags(syscall['flags']), |
| 383 | retval=decode_errno(event.union.retval)) |
| 384 | print('{:16} {:<7} {:<7} {:<11} {}'.format( |
| 385 | syscall['comm'].decode(), syscall['tgid'], syscall['pid'], |
| 386 | syscall['mnt_ns'], call)) |
| 387 | except KeyError: |
| 388 | # This might happen if we lost an event. |
| 389 | pass |
| 390 | |
| 391 | |
| 392 | def main(): |
| 393 | parser = argparse.ArgumentParser( |
| 394 | description='trace mount() and umount() syscalls' |
| 395 | ) |
| 396 | args = parser.parse_args() |
| 397 | |
| 398 | mounts = {} |
| 399 | umounts = {} |
| 400 | b = bcc.BPF(text=bpf_text) |
| 401 | b['events'].open_perf_buffer( |
| 402 | functools.partial(print_event, mounts, umounts)) |
| 403 | print('{:16} {:<7} {:<7} {:<11} {}'.format( |
| 404 | 'COMM', 'PID', 'TID', 'MNT_NS', 'CALL')) |
| 405 | while True: |
| 406 | b.kprobe_poll() |
| 407 | |
| 408 | |
| 409 | if __name__ == '__main__': |
| 410 | main() |