[PATCH] IPC namespace core

This patch set allows to unshare IPCs and have a private set of IPC objects
(sem, shm, msg) inside namespace.  Basically, it is another building block of
containers functionality.

This patch implements core IPC namespace changes:
- ipc_namespace structure
- new config option CONFIG_IPC_NS
- adds CLONE_NEWIPC flag
- unshare support

[clg@fr.ibm.com: small fix for unshare of ipc namespace]
[akpm@osdl.org: build fix]
Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e08531e..ceecf69 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -74,6 +74,7 @@
 	.count		= ATOMIC_INIT(1),				\
 	.nslock		= SPIN_LOCK_UNLOCKED,				\
 	.uts_ns		= &init_uts_ns,					\
+	.ipc_ns		= &init_ipc_ns,					\
 	.namespace	= NULL,						\
 }
 
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index b291189..36027b1 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -2,6 +2,7 @@
 #define _LINUX_IPC_H
 
 #include <linux/types.h>
+#include <linux/kref.h>
 
 #define IPC_PRIVATE ((__kernel_key_t) 0)  
 
@@ -68,6 +69,41 @@
 	void		*security;
 };
 
+struct ipc_ids;
+struct ipc_namespace {
+	struct kref	kref;
+	struct ipc_ids	*ids[3];
+
+	int		sem_ctls[4];
+	int		used_sems;
+
+	int		msg_ctlmax;
+	int		msg_ctlmnb;
+	int		msg_ctlmni;
+
+	size_t		shm_ctlmax;
+	size_t		shm_ctlall;
+	int		shm_ctlmni;
+	int		shm_tot;
+};
+
+extern struct ipc_namespace init_ipc_ns;
+extern void free_ipc_ns(struct kref *kref);
+extern int copy_ipcs(unsigned long flags, struct task_struct *tsk);
+extern int unshare_ipcs(unsigned long flags, struct ipc_namespace **ns);
+
+static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+{
+	if (ns)
+		kref_get(&ns->kref);
+	return ns;
+}
+
+static inline void put_ipc_ns(struct ipc_namespace *ns)
+{
+	kref_put(&ns->kref, free_ipc_ns);
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_IPC_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 9c2e0ad..f6baecd 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -6,6 +6,7 @@
 
 struct namespace;
 struct uts_namespace;
+struct ipc_namespace;
 
 /*
  * A structure to contain pointers to all per-process
@@ -23,6 +24,7 @@
 	atomic_t count;
 	spinlock_t nslock;
 	struct uts_namespace *uts_ns;
+	struct ipc_namespace *ipc_ns;
 	struct namespace *namespace;
 };
 extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a973e70..9ba959e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -25,6 +25,7 @@
 #define CLONE_CHILD_SETTID	0x01000000	/* set the TID in the child */
 #define CLONE_STOPPED		0x02000000	/* Start in stopped state */
 #define CLONE_NEWUTS		0x04000000	/* New utsname group? */
+#define CLONE_NEWIPC		0x08000000	/* New ipcs */
 
 /*
  * Scheduling policies
diff --git a/init/Kconfig b/init/Kconfig
index b0ea975..1038293 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -115,6 +115,15 @@
 	  section 6.4 of the Linux Programmer's Guide, available from
 	  <http://www.tldp.org/guides.html>.
 
+config IPC_NS
+	bool "IPC Namespaces"
+	depends on SYSVIPC
+	default n
+	help
+	  Support ipc namespaces.  This allows containers, i.e. virtual
+	  environments, to use ipc namespaces to provide different ipc
+	  objects for different servers.  If unsure, say N.
+
 config POSIX_MQUEUE
 	bool "POSIX Message Queues"
 	depends on NET && EXPERIMENTAL
diff --git a/kernel/fork.c b/kernel/fork.c
index 208dd99..d6cc565 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1608,13 +1608,15 @@
 	struct sem_undo_list *new_ulist = NULL;
 	struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
 	struct uts_namespace *uts, *new_uts = NULL;
+	struct ipc_namespace *ipc, *new_ipc = NULL;
 
 	check_unshare_flags(&unshare_flags);
 
 	/* Return -EINVAL for all unsupported flags */
 	err = -EINVAL;
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
-				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|CLONE_NEWUTS))
+				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
+				CLONE_NEWUTS|CLONE_NEWIPC))
 		goto bad_unshare_out;
 
 	if ((err = unshare_thread(unshare_flags)))
@@ -1633,18 +1635,20 @@
 		goto bad_unshare_cleanup_fd;
 	if ((err = unshare_utsname(unshare_flags, &new_uts)))
 		goto bad_unshare_cleanup_semundo;
+	if ((err = unshare_ipcs(unshare_flags, &new_ipc)))
+		goto bad_unshare_cleanup_uts;
 
-	if (new_ns || new_uts) {
+	if (new_ns || new_uts || new_ipc) {
 		old_nsproxy = current->nsproxy;
 		new_nsproxy = dup_namespaces(old_nsproxy);
 		if (!new_nsproxy) {
 			err = -ENOMEM;
-			goto bad_unshare_cleanup_uts;
+			goto bad_unshare_cleanup_ipc;
 		}
 	}
 
 	if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist ||
-				new_uts) {
+				new_uts || new_ipc) {
 
 		task_lock(current);
 
@@ -1692,12 +1696,22 @@
 			new_uts = uts;
 		}
 
+		if (new_ipc) {
+			ipc = current->nsproxy->ipc_ns;
+			current->nsproxy->ipc_ns = new_ipc;
+			new_ipc = ipc;
+		}
+
 		task_unlock(current);
 	}
 
 	if (new_nsproxy)
 		put_nsproxy(new_nsproxy);
 
+bad_unshare_cleanup_ipc:
+	if (new_ipc)
+		put_ipc_ns(new_ipc);
+
 bad_unshare_cleanup_uts:
 	if (new_uts)
 		put_uts_ns(new_uts);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 8246813..8d6c852 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -7,6 +7,10 @@
  *  modify it under the terms of the GNU General Public License as
  *  published by the Free Software Foundation, version 2 of the
  *  License.
+ *
+ *  Jun 2006 - namespaces support
+ *             OpenVZ, SWsoft Inc.
+ *             Pavel Emelianov <xemul@openvz.org>
  */
 
 #include <linux/module.h>
@@ -62,6 +66,8 @@
 			get_namespace(ns->namespace);
 		if (ns->uts_ns)
 			get_uts_ns(ns->uts_ns);
+		if (ns->ipc_ns)
+			get_ipc_ns(ns->ipc_ns);
 	}
 
 	return ns;
@@ -82,7 +88,7 @@
 
 	get_nsproxy(old_ns);
 
-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS)))
+	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
 		return 0;
 
 	new_ns = clone_namespaces(old_ns);
@@ -94,24 +100,31 @@
 	tsk->nsproxy = new_ns;
 
 	err = copy_namespace(flags, tsk);
-	if (err) {
-		tsk->nsproxy = old_ns;
-		put_nsproxy(new_ns);
-		goto out;
-	}
+	if (err)
+		goto out_ns;
 
 	err = copy_utsname(flags, tsk);
-	if (err) {
-		if (new_ns->namespace)
-			put_namespace(new_ns->namespace);
-		tsk->nsproxy = old_ns;
-		put_nsproxy(new_ns);
-		goto out;
-	}
+	if (err)
+		goto out_uts;
+
+	err = copy_ipcs(flags, tsk);
+	if (err)
+		goto out_ipc;
 
 out:
 	put_nsproxy(old_ns);
 	return err;
+
+out_ipc:
+	if (new_ns->uts_ns)
+		put_uts_ns(new_ns->uts_ns);
+out_uts:
+	if (new_ns->namespace)
+		put_namespace(new_ns->namespace);
+out_ns:
+	tsk->nsproxy = old_ns;
+	put_nsproxy(new_ns);
+	goto out;
 }
 
 void free_nsproxy(struct nsproxy *ns)
@@ -120,5 +133,7 @@
 			put_namespace(ns->namespace);
 		if (ns->uts_ns)
 			put_uts_ns(ns->uts_ns);
+		if (ns->ipc_ns)
+			put_ipc_ns(ns->ipc_ns);
 		kfree(ns);
 }