Make Minijail work correctly with shared mounts.

This fixes some problems that appear when system booted with systemd.
Systemd sets all mounts to shared. This means that when minijail0 creates
mount namespace new mounts will propogate out of that namespace.

This change fixes that by setting all mounts to private right after
creating new namespace.
Also when remounting /proc it unmounts it lazily, as normal umount()
may fail when shared mounts are enabled.

More information about shared mounts:
https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt

(Original patch by Andrey Ulanov <andreyu@google.com> at
https://chromium-review.googlesource.com/303158)

Change-Id: I0ff5851dba32524bd6c4ad663b67826fb9be0485
diff --git a/libminijail.c b/libminijail.c
index 5ccb9c1..7eba3ac 100644
--- a/libminijail.c
+++ b/libminijail.c
@@ -821,7 +821,7 @@
 	 * and make our own. However, if we are in a new user namespace, /proc
 	 * is not seen as mounted, so don't return error if umount() fails.
 	 */
-	if (umount(kProcPath) && !j->flags.userns)
+	if (umount2(kProcPath, MNT_DETACH) && !j->flags.userns)
 		return -errno;
 	if (mount("", kProcPath, "proc", kSafeFlags | MS_RDONLY, ""))
 		return -errno;
@@ -992,8 +992,17 @@
 	if (j->flags.enter_vfs && setns(j->mountns_fd, CLONE_NEWNS))
 		pdie("setns(CLONE_NEWNS)");
 
-	if (j->flags.vfs && unshare(CLONE_NEWNS))
-		pdie("unshare(vfs)");
+	if (j->flags.vfs) {
+          if (unshare(CLONE_NEWNS))
+            pdie("unshare(vfs)");
+          /*
+           * Remount all filesystems as private. If they are shared
+           * new bind mounts will creep out of our namespace.
+           * https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt
+           */
+          if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL))
+            pdie("mount(/, private)");
+        }
 
 	if (j->flags.net && unshare(CLONE_NEWNET))
 		pdie("unshare(net)");