userns: Add per user namespace sysctls.

Limit per userns sysctls to only be opened for write by a holder
of CAP_SYS_RESOURCE.

Add all of the necessary boilerplate for having per user namespace
sysctls.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4e79b3c..e5697ea 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -40,6 +40,10 @@
 	struct rw_semaphore	persistent_keyring_register_sem;
 #endif
 	struct work_struct	work;
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_set	set;
+	struct ctl_table_header *sysctls;
+#endif
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/kernel/Makefile b/kernel/Makefile
index e2ec54e..eb26e12c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@
 	    extable.o params.o \
 	    kthread.o sys_ni.o nsproxy.o \
 	    notifier.o ksysfs.o cred.o reboot.o \
-	    async.o range.o smpboot.o
+	    async.o range.o smpboot.o ucount.o
 
 obj-$(CONFIG_MULTIUSER) += groups.o
 
diff --git a/kernel/ucount.c b/kernel/ucount.c
new file mode 100644
index 0000000..cbde1dc8
--- /dev/null
+++ b/kernel/ucount.c
@@ -0,0 +1,99 @@
+/*
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ */
+
+#include <linux/stat.h>
+#include <linux/sysctl.h>
+#include <linux/slab.h>
+#include <linux/user_namespace.h>
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_set *
+set_lookup(struct ctl_table_root *root)
+{
+	return &current_user_ns()->set;
+}
+
+static int set_is_seen(struct ctl_table_set *set)
+{
+	return &current_user_ns()->set == set;
+}
+
+static int set_permissions(struct ctl_table_header *head,
+				  struct ctl_table *table)
+{
+	struct user_namespace *user_ns =
+		container_of(head->set, struct user_namespace, set);
+	int mode;
+
+	/* Allow users with CAP_SYS_RESOURCE unrestrained access */
+	if (ns_capable(user_ns, CAP_SYS_RESOURCE))
+		mode = (table->mode & S_IRWXU) >> 6;
+	else
+	/* Allow all others at most read-only access */
+		mode = table->mode & S_IROTH;
+	return (mode << 6) | (mode << 3) | mode;
+}
+
+static struct ctl_table_root set_root = {
+	.lookup = set_lookup,
+	.permissions = set_permissions,
+};
+
+static struct ctl_table userns_table[] = {
+	{ }
+};
+#endif /* CONFIG_SYSCTL */
+
+bool setup_userns_sysctls(struct user_namespace *ns)
+{
+#ifdef CONFIG_SYSCTL
+	struct ctl_table *tbl;
+	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
+	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
+	if (tbl) {
+		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
+	}
+	if (!ns->sysctls) {
+		kfree(tbl);
+		retire_sysctl_set(&ns->set);
+		return false;
+	}
+#endif
+	return true;
+}
+
+void retire_userns_sysctls(struct user_namespace *ns)
+{
+#ifdef CONFIG_SYSCTL
+	struct ctl_table *tbl;
+
+	tbl = ns->sysctls->ctl_table_arg;
+	unregister_sysctl_table(ns->sysctls);
+	retire_sysctl_set(&ns->set);
+	kfree(tbl);
+#endif
+}
+
+static __init int user_namespace_sysctl_init(void)
+{
+#ifdef CONFIG_SYSCTL
+	static struct ctl_table_header *userns_header;
+	static struct ctl_table empty[1];
+	/*
+	 * It is necessary to register the userns directory in the
+	 * default set so that registrations in the child sets work
+	 * properly.
+	 */
+	userns_header = register_sysctl("userns", empty);
+	BUG_ON(!userns_header);
+	BUG_ON(!setup_userns_sysctls(&init_user_ns));
+#endif
+	return 0;
+}
+subsys_initcall(user_namespace_sysctl_init);
+
+
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 5247cdb..a633322 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -23,6 +23,9 @@
 #include <linux/projid.h>
 #include <linux/fs_struct.h>
 
+extern bool setup_userns_sysctls(struct user_namespace *ns);
+extern void retire_userns_sysctls(struct user_namespace *ns);
+
 static struct kmem_cache *user_ns_cachep __read_mostly;
 static DEFINE_MUTEX(userns_state_mutex);
 
@@ -109,12 +112,22 @@
 	ns->flags = parent_ns->flags;
 	mutex_unlock(&userns_state_mutex);
 
-	set_cred_user_ns(new, ns);
-
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 	init_rwsem(&ns->persistent_keyring_register_sem);
 #endif
+	ret = -ENOMEM;
+	if (!setup_userns_sysctls(ns))
+		goto fail_keyring;
+
+	set_cred_user_ns(new, ns);
 	return 0;
+fail_keyring:
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+	key_put(ns->persistent_keyring_register);
+#endif
+	ns_free_inum(&ns->ns);
+	kmem_cache_free(user_ns_cachep, ns);
+	return ret;
 }
 
 int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
@@ -144,6 +157,7 @@
 
 	do {
 		parent = ns->parent;
+		retire_userns_sysctls(ns);
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 		key_put(ns->persistent_keyring_register);
 #endif