add an option to set up a minimal /dev
(high level design)
This will unmount the outside /dev to help remove access to existing
device nodes, and then set up a small tmpfs over /dev with a minimal
set of safe/common nodes. Most daemons do not need more than these.
(low level details)
In order to support common use cases like passing in one or two extra
device nodes (e.g. /dev/log), we cannot mess with /dev directly. We
create a temporary directory, create all our nodes in there, then add
any bind mounts into /dev to that path. Then just before we chroot
or pivot, we move the mount from its temp location to the final /dev.
Bug: chromium:680859
Test: `sudo ./minijail0 -rvpd /bin/ls -l /dev/` shows reduced # of files
Test: `sudo ./minijail0 -rvpd /bin/grep /dev /proc/mounts` shows only one /dev mount
Test: `sudo ./minijail0 -rvpd -C / -b /dev/log,/dev/log /bin/logger asdf` allows logging to work
Test: `sudo ./minijail0 -rvpd -C /var/empty -b /,/ -b /dev/mixer,/dev/mixer /bin/ls -l /dev/` shows /dev/mixer
Change-Id: Ie91869971baffde0b86c5d244c584e45099abe7e
diff --git a/libminijail.c b/libminijail.c
index f2a937b..0db5adc 100644
--- a/libminijail.c
+++ b/libminijail.c
@@ -26,6 +26,7 @@
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/stat.h>
+#include <sys/sysmacros.h>
#include <sys/types.h>
#include <sys/user.h>
#include <sys/wait.h>
@@ -142,6 +143,7 @@
int seccomp_filter_logging : 1;
int chroot : 1;
int pivot_root : 1;
+ int mount_dev : 1;
int mount_tmp : 1;
int do_init : 1;
int run_as_init : 1;
@@ -607,6 +609,11 @@
return j->tmpfs_size;
}
+void API minijail_mount_dev(struct minijail *j)
+{
+ j->flags.mount_dev = 1;
+}
+
void API minijail_mount_tmp(struct minijail *j)
{
minijail_mount_tmp_size(j, 64 * 1024 * 1024);
@@ -1183,6 +1190,152 @@
return ret;
}
+struct dev_spec {
+ const char *name;
+ mode_t mode;
+ dev_t major, minor;
+};
+
+static const struct dev_spec device_nodes[] = {
+ {
+ "null",
+ S_IFCHR | 0666, 1, 3,
+ },
+ {
+ "zero",
+ S_IFCHR | 0666, 1, 5,
+ },
+ {
+ "full",
+ S_IFCHR | 0666, 1, 7,
+ },
+ {
+ "urandom",
+ S_IFCHR | 0444, 1, 9,
+ },
+ {
+ "tty",
+ S_IFCHR | 0666, 5, 0,
+ },
+};
+
+struct dev_sym_spec {
+ const char *source, *dest;
+};
+
+static const struct dev_sym_spec device_symlinks[] = {
+ { "ptmx", "pts/ptmx", },
+ { "fd", "/proc/self/fd", },
+ { "stdin", "fd/0", },
+ { "stdout", "fd/1", },
+ { "stderr", "fd/2", },
+};
+
+/*
+ * Clean up the temporary dev path we had setup previously. In case of errors,
+ * we don't want to go leaking empty tempdirs.
+ */
+static void mount_dev_cleanup(char *dev_path)
+{
+ umount2(dev_path, MNT_DETACH);
+ rmdir(dev_path);
+ free(dev_path);
+}
+
+/*
+ * Set up the pseudo /dev path at the temporary location.
+ * See mount_dev_finalize for more details.
+ */
+static int mount_dev(char **dev_path_ret)
+{
+ int ret;
+ int dev_fd;
+ size_t i;
+ mode_t mask;
+ char *dev_path;
+
+ /*
+ * Create a temp path for the /dev init. We'll relocate this to the
+ * final location later on in the startup process.
+ */
+ dev_path = *dev_path_ret = strdup("/tmp/minijail.dev.XXXXXX");
+ if (dev_path == NULL || mkdtemp(dev_path) == NULL)
+ pdie("could not create temp path for /dev");
+
+ /* Set up the empty /dev mount point first. */
+ ret = mount("minijail-devfs", dev_path, "tmpfs",
+ MS_NOEXEC | MS_NOSUID, "size=5M,mode=755");
+ if (ret) {
+ rmdir(dev_path);
+ return ret;
+ }
+
+ /* We want to set the mode directly from the spec. */
+ mask = umask(0);
+
+ /* Get a handle to the temp dev path for *at funcs below. */
+ dev_fd = open(dev_path, O_DIRECTORY|O_PATH|O_CLOEXEC);
+ if (dev_fd < 0) {
+ ret = 1;
+ goto done;
+ }
+
+ /* Create all the nodes in /dev. */
+ for (i = 0; i < ARRAY_SIZE(device_nodes); ++i) {
+ const struct dev_spec *ds = &device_nodes[i];
+ ret = mknodat(dev_fd, ds->name, ds->mode,
+ makedev(ds->major, ds->minor));
+ if (ret)
+ goto done;
+ }
+
+ /* Create all the symlinks in /dev. */
+ for (i = 0; i < ARRAY_SIZE(device_symlinks); ++i) {
+ const struct dev_sym_spec *ds = &device_symlinks[i];
+ ret = symlinkat(ds->dest, dev_fd, ds->source);
+ if (ret)
+ goto done;
+ }
+
+ /* Restore old mask. */
+ done:
+ close(dev_fd);
+ umask(mask);
+
+ if (ret)
+ mount_dev_cleanup(dev_path);
+
+ return ret;
+}
+
+/*
+ * Relocate the temporary /dev mount to its final /dev place.
+ * We have to do this two step process so people can bind mount extra
+ * /dev paths like /dev/log.
+ */
+static int mount_dev_finalize(const struct minijail *j, char *dev_path)
+{
+ int ret = -1;
+ char *dest = NULL;
+
+ /* Unmount the /dev mount if possible. */
+ if (umount2("/dev", MNT_DETACH))
+ goto done;
+
+ if (asprintf(&dest, "%s/dev", j->chrootdir ? : "") < 0)
+ goto done;
+
+ if (mount(dev_path, dest, NULL, MS_MOVE, NULL))
+ goto done;
+
+ ret = 0;
+ done:
+ free(dest);
+ mount_dev_cleanup(dev_path);
+
+ return ret;
+}
+
/*
* mount_one: Applies mounts from @m for @j, recursing as needed.
* @j Minijail these mounts are for
@@ -1190,19 +1343,29 @@
*
* Returns 0 for success.
*/
-static int mount_one(const struct minijail *j, struct mountpoint *m)
+static int mount_one(const struct minijail *j, struct mountpoint *m,
+ const char *dev_path)
{
int ret;
char *dest;
int remount_ro = 0;
- /* |dest| has a leading "/". */
- if (asprintf(&dest, "%s%s", j->chrootdir, m->dest) < 0)
- return -ENOMEM;
+ /* We assume |dest| has a leading "/". */
+ if (dev_path && strncmp("/dev/", m->dest, 5) == 0) {
+ /* Since the temp path is rooted at /dev, skip that dest part. */
+ if (asprintf(&dest, "%s%s", dev_path, m->dest + 4) < 0)
+ return -ENOMEM;
+ } else {
+ if (asprintf(&dest, "%s%s", j->chrootdir, m->dest) < 0)
+ return -ENOMEM;
+ }
- if (setup_mount_destination(m->src, dest, j->uid, j->gid,
- (m->flags & MS_BIND)))
- pdie("creating mount target '%s' failed", dest);
+ ret = setup_mount_destination(m->src, dest, j->uid, j->gid,
+ (m->flags & MS_BIND));
+ if (ret) {
+ pwarn("creating mount target '%s' failed", dest);
+ return ret;
+ }
/*
* R/O bind mounts have to be remounted since 'bind' and 'ro'
@@ -1215,28 +1378,39 @@
}
ret = mount(m->src, dest, m->type, m->flags, m->data);
- if (ret)
- pdie("mount: %s -> %s", m->src, dest);
+ if (ret) {
+ pwarn("mount: %s -> %s", m->src, dest);
+ return ret;
+ }
if (remount_ro) {
m->flags |= MS_RDONLY;
ret = mount(m->src, dest, NULL,
m->flags | MS_REMOUNT, m->data);
- if (ret)
- pdie("bind ro: %s -> %s", m->src, dest);
+ if (ret) {
+ pwarn("bind ro: %s -> %s", m->src, dest);
+ return ret;
+ }
}
free(dest);
if (m->next)
- return mount_one(j, m->next);
+ return mount_one(j, m->next, dev_path);
return ret;
}
-static int enter_chroot(const struct minijail *j)
+static int enter_chroot(const struct minijail *j, char *dev_path)
{
int ret;
- if (j->mounts_head && (ret = mount_one(j, j->mounts_head)))
+ if (j->mounts_head && (ret = mount_one(j, j->mounts_head, dev_path)))
+ return ret;
+
+ /*
+ * Once all bind mounts have been processed, but before we chroot,
+ * move the temp dev to its final /dev home.
+ */
+ if (j->flags.mount_dev && mount_dev_finalize(j, dev_path))
return ret;
run_hooks_or_die(j, MINIJAIL_HOOK_EVENT_PRE_CHROOT);
@@ -1250,11 +1424,18 @@
return 0;
}
-static int enter_pivot_root(const struct minijail *j)
+static int enter_pivot_root(const struct minijail *j, char *dev_path)
{
int ret, oldroot, newroot;
- if (j->mounts_head && (ret = mount_one(j, j->mounts_head)))
+ if (j->mounts_head && (ret = mount_one(j, j->mounts_head, dev_path)))
+ return ret;
+
+ /*
+ * Once all bind mounts have been processed, but before we pivot,
+ * move the temp dev to its final /dev home.
+ */
+ if (j->flags.mount_dev && mount_dev_finalize(j, dev_path))
return ret;
run_hooks_or_die(j, MINIJAIL_HOOK_EVENT_PRE_CHROOT);
@@ -1789,11 +1970,33 @@
pdie("keyctl(KEYCTL_JOIN_SESSION_KEYRING) failed");
}
- if (j->flags.chroot && enter_chroot(j))
- pdie("chroot");
+ /*
+ * This has to come before the chroot/pivot_root in case there are
+ * bind mounts from /dev into the chroot dev.
+ */
+ char *dev_path = NULL;
+ if (j->flags.mount_dev && mount_dev(&dev_path))
+ pdie("mount_dev");
- if (j->flags.pivot_root && enter_pivot_root(j))
+ if (j->flags.chroot && enter_chroot(j, dev_path)) {
+ if (dev_path)
+ mount_dev_cleanup(dev_path);
+ pdie("chroot");
+ }
+
+ if (j->flags.pivot_root && enter_pivot_root(j, dev_path)) {
+ if (dev_path)
+ mount_dev_cleanup(dev_path);
pdie("pivot_root");
+ }
+
+ /*
+ * If using a chroot or pivot root, we already finalized /dev at
+ * the right point. If not, we need to call it ourselves.
+ */
+ if (j->flags.mount_dev && !j->flags.chroot && !j->flags.pivot_root &&
+ mount_dev_finalize(j, dev_path))
+ pdie("mount_dev_finalize");
if (j->flags.mount_tmp && mount_tmp(j))
pdie("mount_tmp");