RFC: minijail: add libminijail.
Drewry requested an implementation of minijail that:
1) Would be linkable against C programs
2) Not depend on libbase
3) Supply the necessary LD_PRELOAD hacks to use his syscall-filtering framework
without the apply-after-exec hack and to use ptrace-disable.
Thoughts?
BUG=chromium-os:17937
TEST=Adhoc (extremely ;)). Proper test suite to be written; crosbug.com/18834
Change-Id: I8b34557a9a231dad75827c1a3d11f235f712648d
Signed-off-by: Elly Jones <ellyjones@chromium.org>
Reviewed-on: http://gerrit.chromium.org/gerrit/4585
Reviewed-by: Will Drewry <wad@chromium.org>
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0a124ca
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,20 @@
+# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+PRELOADPATH ?= \"/lib/libminijailpreload.so\"
+CFLAGS += -fPIC -Wall -Wextra -Werror -DPRELOADPATH="$(PRELOADPATH)"
+
+all : minijail0 libminijailpreload.so
+
+minijail0 : libminijail.o minijail0.c
+ $(CC) $(CFLAGS) -o $@ $^ -lcap
+
+libminijailpreload.so : libminijailpreload.c libminijail.o
+ $(CC) $(CFLAGS) -shared -o $@ $^ -ldl -lcap
+
+libminijail.o : libminijail.c libminijail.h
+
+install : minijail0 libminijailpreload.so
+ install -D minijail0 $(DESTDIR)usr/sbin/minijail0
+ install -D libminijailpreload.so $(DESTDIR)lib/libminijailpreload.so
diff --git a/libminijail-private.h b/libminijail-private.h
new file mode 100644
index 0000000..a304485
--- /dev/null
+++ b/libminijail-private.h
@@ -0,0 +1,15 @@
+/* libminijail-private.h
+ * Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ *
+ * Values shared between libminijailpreload and libminijail, but not visible to
+ * the outside world.
+ */
+
+#ifndef LIBMINIJAIL_PRIVATE_H
+#define LIBMINIJAIL_PRIVATE_H
+
+static const char *kCommandEnvVar = "__MINIJAIL_PRELOAD";
+
+#endif /* !LIBMINIJAIL_PRIVATE_H */
diff --git a/libminijail.c b/libminijail.c
new file mode 100644
index 0000000..3c5ec58
--- /dev/null
+++ b/libminijail.c
@@ -0,0 +1,387 @@
+/* Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file. */
+
+#define _BSD_SOURCE
+#define _GNU_SOURCE
+#include <errno.h>
+#include <grp.h>
+#include <inttypes.h>
+#include <linux/capability.h>
+#include <linux/securebits.h>
+#include <pwd.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/capability.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "libminijail.h"
+#include "libminijail-private.h"
+
+struct minijail {
+ struct {
+ int uid : 1;
+ int gid : 1;
+ int caps : 1;
+ int vfs : 1;
+ int pids : 1;
+ int seccomp : 1;
+ int readonly : 1;
+ int usergroups : 1;
+ int ptrace : 1;
+ } flags;
+ uid_t uid;
+ gid_t gid;
+ gid_t usergid;
+ const char *user;
+ uint64_t caps;
+ pid_t initpid;
+};
+
+static void pdie(const char *failed) {
+ syslog(LOG_ERR, "libminijail: %s failed: %s", failed, strerror(errno));
+ abort();
+}
+
+static void die(const char *failed) {
+ syslog(LOG_ERR, "libminijail: %s", failed);
+ abort();
+}
+
+struct minijail *minijail_new(void) {
+ struct minijail *j = malloc(sizeof(*j));
+ if (j)
+ memset(j, 0, sizeof(*j));
+ return j;
+}
+
+void minijail_change_uid(struct minijail *j, uid_t uid) {
+ if (uid == 0)
+ die("useless change to uid 0");
+ j->uid = uid;
+ j->flags.uid = 1;
+}
+
+void minijail_change_gid(struct minijail *j, gid_t gid) {
+ if (gid == 0)
+ die("useless change to gid 0");
+ j->gid = gid;
+ j->flags.gid = 1;
+}
+
+int minijail_change_user(struct minijail *j, const char *user) {
+ /* In principle this should use getpwnam(), but:
+ * 1) getpwnam_r() isn't actually reentrant anyway, since it uses a
+ * statically-allocated file descriptor internally
+ * 2) fgetpwnam() (by analogy with fgetpwent) would solve (1) except that it
+ * doesn't exist
+ * 3) sysconf() (see getpwnam_r(3)) is allowed to return a size that is not
+ * large enough, which means having to loop on growing the buffer we pass
+ * in
+ */
+ struct passwd *pw = getpwnam(user);
+ if (!pw)
+ return errno;
+ minijail_change_uid(j, pw->pw_uid);
+ j->user = user;
+ j->usergid = pw->pw_gid;
+ return 0;
+}
+
+int minijail_change_group(struct minijail *j, const char *group) {
+ /* In principle this should use getgrnam(), but:
+ * 1) getgrnam_r() isn't actually reentrant anyway, since it uses a
+ * statically-allocated file descriptor internally
+ * 2) fgetgrnam() (by analogy with fgetgrent) would solve (1) except that it
+ * doesn't exist
+ * 3) sysconf() (see getgrnam_r(3)) is allowed to return a size that is not
+ * large enough, which means having to loop on growing the buffer we pass
+ * in
+ */
+ struct group *gr = getgrnam(group);
+ if (!gr)
+ return errno;
+ minijail_change_gid(j, gr->gr_gid);
+ return 0;
+}
+
+void minijail_use_seccomp(struct minijail *j) {
+ j->flags.seccomp = 1;
+}
+
+void minijail_use_caps(struct minijail *j, uint64_t capmask) {
+ j->caps = capmask;
+ j->flags.caps = 1;
+}
+
+void minijail_namespace_vfs(struct minijail *j) {
+ j->flags.vfs = 1;
+}
+
+void minijail_namespace_pids(struct minijail *j) {
+ j->flags.pids = 1;
+}
+
+void minijail_remount_readonly(struct minijail *j) {
+ j->flags.vfs = 1;
+ j->flags.readonly = 1;
+}
+
+void minijail_inherit_usergroups(struct minijail *j) {
+ j->flags.usergroups = 1;
+}
+
+void minijail_disable_ptrace(struct minijail *j) {
+ j->flags.ptrace = 1;
+}
+
+static int remount_readonly(void) {
+ const char *kProcPath = "/proc";
+ const unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
+ /* Right now, we're holding a reference to our parent's old mount of /proc in
+ * our namespace, which means using MS_REMOUNT here would mutate our parent's
+ * mount as well, even though we're in a VFS namespace (!). Instead, remove
+ * their mount from our namespace and make our own. */
+ if (umount(kProcPath))
+ return errno;
+ if (mount("", kProcPath, "proc", kSafeFlags | MS_RDONLY, ""))
+ return errno;
+ return 0;
+}
+
+static void drop_caps(const struct minijail *j) {
+ cap_t caps = cap_get_proc();
+ cap_value_t raise_flag[1];
+ unsigned int i;
+ if (!caps)
+ die("can't get process caps");
+ if (cap_clear_flag(caps, CAP_INHERITABLE))
+ die("can't clear inheritable caps");
+ if (cap_clear_flag(caps, CAP_EFFECTIVE))
+ die("can't clear effective caps");
+ if (cap_clear_flag(caps, CAP_PERMITTED))
+ die("can't clear permitted caps");
+ for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
+ if (i != CAP_SETPCAP && !(j->caps & (1 << i)))
+ continue;
+ raise_flag[0] = i;
+ if (cap_set_flag(caps, CAP_EFFECTIVE, 1, raise_flag, CAP_SET))
+ die("can't add effective cap");
+ if (cap_set_flag(caps, CAP_PERMITTED, 1, raise_flag, CAP_SET))
+ die("can't add permitted cap");
+ if (cap_set_flag(caps, CAP_INHERITABLE, 1, raise_flag, CAP_SET))
+ die("can't add inheritable cap");
+ }
+ if (cap_set_proc(caps))
+ die("can't apply cleaned capset");
+ cap_free(caps);
+ for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
+ if (j->caps & (1 << i))
+ continue;
+ if (prctl(PR_CAPBSET_DROP, i))
+ pdie("prctl(PR_CAPBSET_DROP)");
+ }
+}
+
+void minijail_enter(const struct minijail *j) {
+ if (j->flags.pids)
+ die("tried to enter a pid-namespaced jail; try minijail_run()?");
+
+ if (j->flags.usergroups && !j->user)
+ die("usergroup inheritance without username");
+
+ /* We can't recover from failures if we've dropped privileges partially,
+ * so we don't even try. If any of our operations fail, we abort() the
+ * entire process. */
+ if (j->flags.vfs && unshare(CLONE_NEWNS))
+ pdie("unshare");
+
+ if (j->flags.readonly && remount_readonly())
+ pdie("remount");
+
+ if (j->flags.caps) {
+ /* POSIX capabilities are a bit tricky. If we drop our capability to change
+ * uids, our attempt to use setuid() below will fail. Hang on to root caps
+ * across setuid(), then lock securebits. */
+ if (prctl(PR_SET_KEEPCAPS, 1))
+ pdie("prctl(PR_SET_KEEPCAPS)");
+ if (prctl(PR_SET_SECUREBITS, SECURE_ALL_BITS | SECURE_ALL_LOCKS))
+ pdie("prctl(PR_SET_SECUREBITS)");
+ }
+
+ if (j->flags.usergroups && initgroups(j->user, j->usergid))
+ pdie("initgroups");
+ else if (!j->flags.usergroups && setgroups(0, NULL))
+ pdie("setgroups");
+
+ if (j->flags.gid && setresgid(j->gid, j->gid, j->gid))
+ pdie("setresgid");
+
+ if (j->flags.uid && setresuid(j->uid, j->uid, j->uid))
+ pdie("setresuid");
+
+ if (j->flags.caps)
+ drop_caps(j);
+
+ /* seccomp has to come last since it cuts off all the other
+ * privilege-dropping syscalls :) */
+ if (j->flags.seccomp && prctl(PR_SET_SECCOMP, 1))
+ pdie("prctl(PR_SET_SECCOMP)");
+}
+
+static int init_exitstatus = 0;
+
+static void init_term(int __attribute__((unused)) sig) {
+ _exit(init_exitstatus);
+}
+
+static int init(pid_t rootpid) {
+ pid_t pid;
+ int status;
+ signal(SIGTERM, init_term); /* so that we exit with the right status */
+ while ((pid = wait(&status)) > 0) {
+ /* This loop will only end when either there are no processes left inside
+ * our pid namespace or we get a signal. */
+ if (pid == rootpid)
+ init_exitstatus = status;
+ }
+ if (!WIFEXITED(init_exitstatus))
+ _exit(MINIJAIL_ERR_INIT);
+ _exit(WEXITSTATUS(init_exitstatus));
+}
+
+/** @brief Move any commands that need to be done post-exec into an environment
+ * variable
+ * @param j Jail to move commands from.
+ *
+ * Serializes post-exec() commands into a string, removes them from the jail,
+ * and adds them to the environment; they will be deserialized later (see
+ * __minijail_preloaded) and executed inside the execve()'d process.
+ */
+static int move_commands_to_env(struct minijail *j) {
+ const int kEnvBufSize = 256;
+ const char *ptrace = j->flags.ptrace ? "ptrace " : "";
+ const char *seccomp = j->flags.seccomp ? "seccomp " : "";
+ char setuid[64] = "";
+ char caps[32] = "";
+ char *newenv;
+ char *oldenv;
+ char *envbuf = malloc(kEnvBufSize);
+ int r;
+
+ if (!envbuf)
+ return -ENOMEM;
+
+ if (j->flags.caps)
+ snprintf(caps, sizeof(caps), "caps=%" PRIx64 " ", j->caps);
+
+ if (j->flags.uid && j->flags.caps) {
+ snprintf(setuid, sizeof(setuid), "uid=%d ", j->uid);
+ j->flags.uid = 0;
+ }
+
+ j->flags.caps = 0;
+ j->flags.ptrace = 0;
+ j->flags.seccomp = 0;
+
+ r = snprintf(envbuf, kEnvBufSize, "%s%s%s%s", setuid, ptrace, seccomp, caps);
+ if (!r) {
+ /* No commands generated, so no preload needed :) */
+ free(envbuf);
+ return 0;
+ }
+ if (r == kEnvBufSize) {
+ free(envbuf);
+ return -E2BIG;
+ }
+
+ oldenv = getenv("LD_PRELOAD") ? : "";
+ newenv = malloc(strlen(oldenv) + 2 + strlen(PRELOADPATH));
+ if (!newenv) {
+ free(envbuf);
+ return -ENOMEM;
+ }
+
+ /* Only insert a separating space if we have something to separate... */
+ sprintf(newenv, "%s%s%s", oldenv, strlen(oldenv) ? " " : "", PRELOADPATH);
+
+ /* setenv() makes a copy of the string we give it */
+ setenv("LD_PRELOAD", newenv, 1);
+ setenv(kCommandEnvVar, envbuf, 1);
+ free(newenv);
+ free(envbuf);
+ return 0;
+}
+
+int minijail_run(struct minijail *j, const char *filename, char *const argv[]) {
+ unsigned int pidns = j->flags.pids ? CLONE_NEWPID : 0;
+ pid_t r;
+ r = move_commands_to_env(j);
+ if (r)
+ return r;
+
+ r = syscall(SYS_clone, pidns | SIGCHLD, NULL);
+ if (r > 0) {
+ j->initpid = r;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ j->flags.pids = 0;
+
+ /* Jail this process and its descendants... */
+ minijail_enter(j);
+
+ if (pidns) {
+ /* pid namespace: this process will become init inside the new namespace, so
+ * fork off a child to actually run the program (we don't want all programs
+ * we might exec to have to know how to be init). */
+ r = fork();
+ if (r < 0)
+ _exit(r);
+ else if (r > 0)
+ init(r); /* never returns */
+ }
+
+ /* If we aren't pid-namespaced:
+ * calling process
+ * -> execve()-ing process
+ * If we are:
+ * calling process
+ * -> init()-ing process
+ * -> execve()-ing process
+ */
+ _exit(execve(filename, argv, environ));
+}
+
+int minijail_kill(struct minijail *j) {
+ int st;
+ if (kill(j->initpid, SIGTERM))
+ return errno;
+ if (waitpid(j->initpid, &st, 0) < 0)
+ return errno;
+ return st;
+}
+
+int minijail_wait(struct minijail *j) {
+ int st;
+ if (waitpid(j->initpid, &st, 0) < 0)
+ return errno;
+ if (!WIFEXITED(st))
+ return MINIJAIL_ERR_JAIL;
+ return WEXITSTATUS(st);
+}
+
+void minijail_destroy(struct minijail *j) {
+ free(j);
+}
+
diff --git a/libminijail.h b/libminijail.h
new file mode 100644
index 0000000..0df119e
--- /dev/null
+++ b/libminijail.h
@@ -0,0 +1,80 @@
+/* Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file. */
+
+/* The general pattern of use here:
+ * 1) Construct a minijail with minijail_new()
+ * 2) Apply the desired restrictions to it
+ * 3) Enter it, which locks the current process inside it, or:
+ * 3) Run a process inside it
+ * 4) Destroy it.
+ */
+
+#ifndef LIBMINIJAIL_H_
+#define LIBMINIJAIL_H_
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ MINIJAIL_ERR_PRELOAD = 252,
+ MINIJAIL_ERR_JAIL = 253,
+ MINIJAIL_ERR_INIT = 254,
+};
+
+struct minijail;
+
+/* Allocates a new minijail with no restrictions. */
+struct minijail *minijail_new(void);
+
+/* These functions add restrictions to the minijail. They are not applied until
+ * minijail_enter() is called. See the documentation in minijail0.1 for
+ * explanations in detail of what the restrictions do. */
+void minijail_change_uid(struct minijail *j, uid_t uid);
+void minijail_change_gid(struct minijail *j, gid_t gid);
+/* 'user' should be kept valid until minijail_destroy() */
+int minijail_change_user(struct minijail *j, const char *user);
+/* 'group' should be kept valid until minijail_destroy() */
+int minijail_change_group(struct minijail *j, const char *group);
+void minijail_use_seccomp(struct minijail *j);
+void minijail_use_caps(struct minijail *j, uint64_t capmask);
+void minijail_namespace_vfs(struct minijail *j);
+void minijail_namespace_pids(struct minijail *j);
+void minijail_remount_readonly(struct minijail *j);
+void minijail_inherit_usergroups(struct minijail *j);
+void minijail_disable_ptrace(struct minijail *j);
+
+/* Lock this process into the given minijail. Note that this procedure cannot fail,
+ * since there is no way to undo privilege-dropping; therefore, if any part of
+ * the privilege-drop fails, minijail_enter() will abort the entire process.
+ *
+ * Some restrictions cannot be enabled this way (pid namespaces) and attempting
+ * to do so will cause an abort.
+ */
+void minijail_enter(const struct minijail *j);
+
+/* Run the specified command in the given minijail, execve(3)-style. This is
+ * required if minijail_namespace_pids() was used. */
+int minijail_run(struct minijail *j, const char *filename, char *const argv[]);
+
+/* Kill the specified minijail. The minijail must have been created with pid
+ * namespacing; if it was, all processes inside it are atomically killed. */
+int minijail_kill(struct minijail *j);
+
+/* Wait for all processed in the specified minijail to exit. Returns the exit
+ * status of the _first_ process spawned in the jail. */
+int minijail_wait(struct minijail *j);
+
+/* Frees the given minijail. It does not matter if the process is inside the minijail or
+ * not. */
+void minijail_destroy(struct minijail *j);
+
+#ifdef __cplusplus
+}; /* extern "C" */
+#endif
+
+#endif /* !LIBMINIJAIL_H_ */
diff --git a/libminijailpreload.c b/libminijailpreload.c
new file mode 100644
index 0000000..975c335
--- /dev/null
+++ b/libminijailpreload.c
@@ -0,0 +1,146 @@
+/* libminijailpreload.c - preload hack library
+ * Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ *
+ * This library is preloaded into every program launched by minijail_run().
+ * DO NOT EXPORT ANY SYMBOLS FROM THIS LIBRARY. They will replace other symbols
+ * in the programs it is preloaded into and cause impossible-to-debug failures.
+ * See the minijail0.1 for a design explanation. */
+
+#include "libminijail.h"
+#include "libminijail-private.h"
+
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <syslog.h>
+#include <unistd.h>
+
+static int (*real_main)(int, char **, char **) = NULL;
+static void *libc_handle = NULL;
+
+static void die(const char *failed) {
+ syslog(LOG_ERR, "libminijail: %s", failed);
+ abort();
+}
+
+static void unset_in_env(char **envp, const char *name) {
+ int i;
+ for (i = 0; envp[i]; i++)
+ if (!strncmp(envp[i], name, strlen(name)))
+ envp[i][0] = '\0';
+}
+
+static void splitarg(char *str, char **key, char **val) {
+ *key = strsep(&str, "=");
+ *val = strsep(&str, "");
+}
+
+/** @brief Fake main(), spliced in before the real call to main() by
+ * __libc_start_main (see below).
+ * We get serialized commands from our invoking process in an environment
+ * variable (kCommandEnvVar). The environment variable is a list of key=value
+ * pairs (see move_commands_to_env); we use them to construct a jail, then
+ * enter it.
+ */
+static int fake_main(int argc, char **argv, char **envp) {
+ char *args = getenv(kCommandEnvVar);
+ char *copy, *oldcopy;
+ char *arg;
+ struct minijail *j;
+ if (geteuid() != getuid() || getegid() != getgid())
+ /* If we didn't do this check, an attacker could set kCommandEnvVar for
+ * any setuid program that uses libminijail to cause it to get capabilities
+ * or a uid it did not expect. */
+ return MINIJAIL_ERR_PRELOAD;
+ if (!args)
+ return MINIJAIL_ERR_PRELOAD;
+ if (!(copy = strdup(args)))
+ die("preload: out of memory");
+ oldcopy = copy;
+ j = minijail_new();
+ if (!j)
+ die("preload: out of memory");
+ while ((arg = strsep(©, " "))) {
+ char *key, *val;
+ unsigned long v;
+ splitarg(arg, &key, &val);
+ if (!strcmp(key, "caps")) {
+ v = strtoul(val, NULL, 16);
+ minijail_use_caps(j, v);
+ }
+ else if (!strcmp(key, "ptrace"))
+ minijail_disable_ptrace(j);
+ else if (!strcmp(key, "uid")) {
+ v = atoi(val);
+ minijail_change_uid(j, v);
+ }
+ else if (!strcmp(key, "seccomp"))
+ minijail_use_seccomp(j);
+ }
+ /* TODO(ellyjones): this trashes existing preloads, so one can't do:
+ * LD_PRELOAD="/tmp/test.so libminijailpreload.so" prog; the descendants of
+ * prog will have no LD_PRELOAD set at all. */
+ unset_in_env(envp, "LD_PRELOAD");
+ minijail_enter(j);
+ minijail_destroy(j);
+ free(oldcopy);
+ dlclose(libc_handle);
+ return real_main(argc, argv, envp);
+}
+
+/** @brief LD_PRELOAD override of __libc_start_main.
+ *
+ * It is really best if you do not look too closely at this function.
+ * We need to ensure that some of our code runs before the target program (see
+ * the minijail0.1 file in this directory for high-level details about this), and
+ * the only available place to hook is this function, which is normally
+ * responsible for calling main(). Our LD_PRELOAD will overwrite the real
+ * __libc_start_main with this one, so we have to look up the real one from
+ * libc and invoke it with a pointer to the fake main() we'd like to run before
+ * the real main(). We can't just run our setup code *here* because
+ * __libc_start_main is responsible for setting up the C runtime environment,
+ * so we can't rely on things like malloc() being available yet.
+ */
+
+int __libc_start_main(int (*main) (int, char **, char **),
+ int argc, char ** ubp_av, void (*init) (void),
+ void (*fini) (void), void (*rtld_fini) (void),
+ void (* stack_end)) {
+ void *sym;
+ /* This hack is unfortunately required by C99 - casting directly from void* to
+ * function pointers is left undefined. See POSIX.1-2003, the Rationale for
+ * the specification of dlsym(), and dlsym(3). This deliberately violates
+ * strict-aliasing rules, but gcc can't tell. */
+ union {
+ int (*fn)(int (*main) (int, char **, char **), int argc,
+ char **ubp_av, void (*init) (void), void (*fini) (void),
+ void (*rtld_fini) (void), void (* stack_end));
+ void *symval;
+ } real_libc_start_main;
+
+ /* We hold this handle for the duration of the real __libc_start_main() and
+ * drop it just before calling the real main(). */
+ libc_handle = dlopen("libc.so.6", RTLD_NOW);
+
+ if (!libc_handle) {
+ syslog(LOG_ERR, "can't dlopen() libc");
+ /* We dare not use abort() here because it will run atexit() handlers and
+ * try to flush stdio. */
+ _exit(1);
+ }
+ sym = dlsym(libc_handle, "__libc_start_main");
+ if (!sym) {
+ syslog(LOG_ERR, "can't find the real __libc_start_main()");
+ _exit(1);
+ }
+ real_libc_start_main.symval = sym;
+ real_main = main;
+
+ /* Note that we swap fake_main in for main - fake_main knows that it should
+ * call real_main after it's done. */
+ return real_libc_start_main.fn(fake_main, argc, ubp_av, init, fini, rtld_fini,
+ stack_end);
+}
diff --git a/minijail0.1 b/minijail0.1
new file mode 100644
index 0000000..15ceeca
--- /dev/null
+++ b/minijail0.1
@@ -0,0 +1,71 @@
+.TH MINIJAIL0 "1" "July 2011" "Chromium OS" "User Commands"
+.SH NAME
+minijail0 \- sandbox a process
+.SH SYNOPSIS
+.B minijail0
+[\fIOPTION\fR]... <\fIprogram\fR> [\fIargs\fR]...
+.SH DESCRIPTION
+.PP
+Runs PROGRAM inside a sandbox.
+.TP
+\fB-c <caps>\fR
+Restrict capabilities to \fIcaps\fR. When used in conjunction with \fB-u\fR and
+\fB-g\fR, this allows a program to have access to only certain parts of root's
+default privileges while running as another user and group ID altogether. Note
+that these capabilities are not inherited by subprocesses of the process given
+capabilities unless those subprocesses have POSIX file capabilities. See
+\fBcapabilities\fR(7).
+.TP
+\fB-G\fR
+Inherit all the supplementary groups of the user specified with \fB-u\fR. It
+is an error to use this option without having specified a \fBuser name\fR to
+\fB-u\fR.
+.TP
+\fB-g <group>\fR
+Change groups to \fIgroup\fR, which may be either a group name or a numeric
+group ID.
+.TP
+\fB-h\fR
+Print a help message.
+.TP
+\fB-p\fR
+Run inside a new PID namespace. This option will make it impossible for the
+program to see or affect processes that are not its descendants.
+.TP
+\fB-r\fR
+Remount certain filesystems readonly. Currently this only remounts /proc. This
+implies \fB-v\fR. Remounting /proc readonly means that even if the process has
+write access to a system config knob in /proc (e.g., in /sys/kernel), it cannot
+change the value.
+.TP
+\fB-s\fR
+Enable seccomp(2) in mode 1, which restricts the child process to a very small
+set of system calls. Support for more elaborate syscall filtering is coming.
+.TP
+\fB-u <user>\fR
+Change users to \fIuser\fR, which may be either a user name or a numeric user
+ID.
+.TP
+\fB-v\fR
+Run inside a new VFS namespace. This option makes the program's mountpoints
+independent of the rest of the system's.
+.SH IMPLEMENTATION
+This program is broken up into two parts: \fBminijail0\fR (the frontend) and a helper
+library called \fBlibminijailpreload\fR. Some jailings can only be achieved from
+the process to which they will actually apply - specifically capability use
+(since capabilities are not inherited to an exec'd process unless the exec'd
+process has POSIX file capabilities), seccomp (since we can't exec() once we're
+seccomp'd), and ptrace-disable (which is always cleared on exec().
+
+To this end, \fBlibminijailpreload\fR is forcibly loaded into all
+dynamically-linked target programs if any of these restrictions are in effect;
+we pass the specific restrictions in an environment variable which the preloaded
+library looks for. The forcibly-loaded library then applies the restrictions
+to the newly-loaded program.
+.SH AUTHOR
+Written by Elly Jones (ellyjones@chromium.org)
+.SH COPYRIGHT
+Copyright \(co 2011 The Chromium OS Authors
+License BSD-like.
+.SH "SEE ALSO"
+\fBlibminijail.h\fR
diff --git a/minijail0.c b/minijail0.c
new file mode 100644
index 0000000..654f332
--- /dev/null
+++ b/minijail0.c
@@ -0,0 +1,110 @@
+/* Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "libminijail.h"
+
+static void set_user(struct minijail *j, const char *arg) {
+ char *end = NULL;
+ int uid = strtod(arg, &end);
+ if (!*end && *arg) {
+ minijail_change_uid(j, uid);
+ return;
+ }
+
+ if (minijail_change_user(j, arg)) {
+ fprintf(stderr, "Bad user: '%s'\n", arg);
+ exit(1);
+ }
+}
+
+static void set_group(struct minijail *j, const char *arg) {
+ char *end = NULL;
+ int gid = strtod(arg, &end);
+ if (!*end && *arg) {
+ minijail_change_gid(j, gid);
+ return;
+ }
+
+ if (minijail_change_group(j, arg)) {
+ fprintf(stderr, "Bad group: '%s'\n", arg);
+ exit(1);
+ }
+}
+
+static void use_caps(struct minijail *j, const char *arg) {
+ uint64_t caps;
+ char *end = NULL;
+ caps = strtoull(arg, &end, 16);
+ if (*end) {
+ fprintf(stderr, "Invalid cap set: '%s'\n", arg);
+ exit(1);
+ }
+ minijail_use_caps(j, caps);
+}
+
+static void usage(const char *progn) {
+ printf("Usage: %s [-Ghprsv] [-c <caps>] [-g <group>] [-u <user>] <program> [args...]\n"
+ " -c: restrict caps to <caps>\n"
+ " -G: inherit groups from uid\n"
+ " -g: change gid to <group>\n"
+ " -h: help (this message)\n"
+ " -p: use pid namespace\n"
+ " -r: remount filesystems readonly (implies -v)\n"
+ " -s: use seccomp\n"
+ " -u: change uid to <user>\n"
+ " -v: use vfs namespace\n", progn);
+}
+
+int main(int argc, char *argv[]) {
+ struct minijail *j = minijail_new();
+
+ int opt;
+ while ((opt = getopt(argc, argv, "u:g:sc:vrGhp")) != -1) {
+ switch (opt) {
+ case 'u':
+ set_user(j, optarg);
+ break;
+ case 'g':
+ set_group(j, optarg);
+ break;
+ case 's':
+ minijail_use_seccomp(j);
+ break;
+ case 'c':
+ use_caps(j, optarg);
+ break;
+ case 'v':
+ minijail_namespace_vfs(j);
+ break;
+ case 'r':
+ minijail_remount_readonly(j);
+ break;
+ case 'G':
+ minijail_inherit_usergroups(j);
+ break;
+ case 'p':
+ minijail_namespace_pids(j);
+ break;
+ default:
+ usage(argv[0]);
+ exit(1);
+ }
+ }
+
+ if (argc == optind) {
+ usage(argv[0]);
+ exit(1);
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ minijail_run(j, argv[0], argv);
+ return minijail_wait(j);
+}