Documentation: prctl/seccomp_filter

Documents how system call filtering using Berkeley Packet
Filter programs works and how it may be used.
Includes an example for x86 and a semi-generic
example using a macro-based code generator.

Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Will Drewry <wad@chromium.org>
Acked-by: Kees Cook <keescook@chromium.org>

v18: - added acked by
     - update no new privs numbers
v17: - remove @compat note and add Pitfalls section for arch checking
       (keescook@chromium.org)
v16: -
v15: -
v14: - rebase/nochanges
v13: - rebase on to 88ebdda6159ffc15699f204c33feb3e431bf9bdc
v12: - comment on the ptrace_event use
     - update arch support comment
     - note the behavior of SECCOMP_RET_DATA when there are multiple filters
       (keescook@chromium.org)
     - lots of samples/ clean up incl 64-bit bpf-direct support
       (markus@chromium.org)
     - rebase to linux-next
v11: - overhaul return value language, updates (keescook@chromium.org)
     - comment on do_exit(SIGSYS)
v10: - update for SIGSYS
     - update for new seccomp_data layout
     - update for ptrace option use
v9: - updated bpf-direct.c for SIGILL
v8: - add PR_SET_NO_NEW_PRIVS to the samples.
v7: - updated for all the new stuff in v7: TRAP, TRACE
    - only talk about PR_SET_SECCOMP now
    - fixed bad JLE32 check (coreyb@linux.vnet.ibm.com)
    - adds dropper.c: a simple system call disabler
v6: - tweak the language to note the requirement of
      PR_SET_NO_NEW_PRIVS being called prior to use. (luto@mit.edu)
v5: - update sample to use system call arguments
    - adds a "fancy" example using a macro-based generator
    - cleaned up bpf in the sample
    - update docs to mention arguments
    - fix prctl value (eparis@redhat.com)
    - language cleanup (rdunlap@xenotime.net)
v4: - update for no_new_privs use
    - minor tweaks
v3: - call out BPF <-> Berkeley Packet Filter (rdunlap@xenotime.net)
    - document use of tentative always-unprivileged
    - guard sample compilation for i386 and x86_64
v2: - move code to samples (corbet@lwn.net)
Signed-off-by: James Morris <james.l.morris@oracle.com>
diff --git a/samples/seccomp/dropper.c b/samples/seccomp/dropper.c
new file mode 100644
index 0000000..c69c347
--- /dev/null
+++ b/samples/seccomp/dropper.c
@@ -0,0 +1,68 @@
+/*
+ * Naive system call dropper built on seccomp_filter.
+ *
+ * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ * Author: Will Drewry <wad@chromium.org>
+ *
+ * The code may be used by anyone for any purpose,
+ * and can serve as a starting point for developing
+ * applications using prctl(PR_SET_SECCOMP, 2, ...).
+ *
+ * When run, returns the specified errno for the specified
+ * system call number against the given architecture.
+ *
+ * Run this one as root as PR_SET_NO_NEW_PRIVS is not called.
+ */
+
+#include <errno.h>
+#include <linux/audit.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <linux/unistd.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+static int install_filter(int nr, int arch, int error)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+			 (offsetof(struct seccomp_data, arch))),
+		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, arch, 0, 3),
+		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+			 (offsetof(struct seccomp_data, nr))),
+		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
+		BPF_STMT(BPF_RET+BPF_K,
+			 SECCOMP_RET_ERRNO|(error & SECCOMP_RET_DATA)),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
+		.filter = filter,
+	};
+	if (prctl(PR_SET_SECCOMP, 2, &prog)) {
+		perror("prctl");
+		return 1;
+	}
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	if (argc < 5) {
+		fprintf(stderr, "Usage:\n"
+			"dropper <syscall_nr> <arch> <errno> <prog> [<args>]\n"
+			"Hint:	AUDIT_ARCH_I386: 0x%X\n"
+			"	AUDIT_ARCH_X86_64: 0x%X\n"
+			"\n", AUDIT_ARCH_I386, AUDIT_ARCH_X86_64);
+		return 1;
+	}
+	if (install_filter(strtol(argv[1], NULL, 0), strtol(argv[2], NULL, 0),
+			   strtol(argv[3], NULL, 0)))
+		return 1;
+	execv(argv[4], &argv[4]);
+	printf("Failed to execv\n");
+	return 255;
+}