[PATCH] cpuset memory spread slab cache optimizations

The hooks in the slab cache allocator code path for support of NUMA
mempolicies and cpuset memory spreading are in an important code path.  Many
systems will use neither feature.

This patch optimizes those hooks down to a single check of some bits in the
current tasks task_struct flags.  For non NUMA systems, this hook and related
code is already ifdef'd out.

The optimization is done by using another task flag, set if the task is using
a non-default NUMA mempolicy.  Taking this flag bit along with the
PF_SPREAD_PAGE and PF_SPREAD_SLAB flag bits added earlier in this 'cpuset
memory spreading' patch set, one can check for the combination of any of these
special case memory placement mechanisms with a single test of the current
tasks task_struct flags.

This patch also tightens up the code, to save a few bytes of kernel text
space, and moves some of it out of line.  Due to the nested inlines called
from multiple places, we were ending up with three copies of this code, which
once we get off the main code path (for local node allocation) seems a bit
wasteful of instruction memory.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e93cc74..4f71cfd 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -422,6 +422,37 @@
 	return mpol_check_policy(mode, nodes);
 }
 
+
+/*
+ * Update task->flags PF_MEMPOLICY bit: set iff non-default
+ * mempolicy.  Allows more rapid checking of this (combined perhaps
+ * with other PF_* flag bits) on memory allocation hot code paths.
+ *
+ * If called from outside this file, the task 'p' should -only- be
+ * a newly forked child not yet visible on the task list, because
+ * manipulating the task flags of a visible task is not safe.
+ *
+ * The above limitation is why this routine has the funny name
+ * mpol_fix_fork_child_flag().
+ *
+ * It is also safe to call this with a task pointer of current,
+ * which the static wrapper mpol_set_task_struct_flag() does,
+ * for use within this file.
+ */
+
+void mpol_fix_fork_child_flag(struct task_struct *p)
+{
+	if (p->mempolicy)
+		p->flags |= PF_MEMPOLICY;
+	else
+		p->flags &= ~PF_MEMPOLICY;
+}
+
+static void mpol_set_task_struct_flag(void)
+{
+	mpol_fix_fork_child_flag(current);
+}
+
 /* Set the process memory policy */
 long do_set_mempolicy(int mode, nodemask_t *nodes)
 {
@@ -434,6 +465,7 @@
 		return PTR_ERR(new);
 	mpol_free(current->mempolicy);
 	current->mempolicy = new;
+	mpol_set_task_struct_flag();
 	if (new && new->policy == MPOL_INTERLEAVE)
 		current->il_next = first_node(new->v.nodes);
 	return 0;
diff --git a/mm/slab.c b/mm/slab.c
index de51665..f80b523 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -899,6 +899,7 @@
 
 #ifdef CONFIG_NUMA
 static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
+static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
 
 static struct array_cache **alloc_alien_cache(int node, int limit)
 {
@@ -2808,19 +2809,11 @@
 	struct array_cache *ac;
 
 #ifdef CONFIG_NUMA
-	if (unlikely(current->mempolicy && !in_interrupt())) {
-		int nid = slab_node(current->mempolicy);
-
-		if (nid != numa_node_id())
-			return __cache_alloc_node(cachep, flags, nid);
-	}
-	if (unlikely(cpuset_do_slab_mem_spread() &&
-					(cachep->flags & SLAB_MEM_SPREAD) &&
-					!in_interrupt())) {
-		int nid = cpuset_mem_spread_node();
-
-		if (nid != numa_node_id())
-			return __cache_alloc_node(cachep, flags, nid);
+	if (unlikely(current->flags & (PF_SPREAD_PAGE | PF_SPREAD_SLAB |
+							PF_MEMPOLICY))) {
+		objp = alternate_node_alloc(cachep, flags);
+		if (objp != NULL)
+			return objp;
 	}
 #endif
 
@@ -2856,6 +2849,28 @@
 
 #ifdef CONFIG_NUMA
 /*
+ * Try allocating on another node if PF_SPREAD_PAGE|PF_SPREAD_SLAB|PF_MEMPOLICY.
+ *
+ * If we are in_interrupt, then process context, including cpusets and
+ * mempolicy, may not apply and should not be used for allocation policy.
+ */
+static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+	int nid_alloc, nid_here;
+
+	if (in_interrupt())
+		return NULL;
+	nid_alloc = nid_here = numa_node_id();
+	if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
+		nid_alloc = cpuset_mem_spread_node();
+	else if (current->mempolicy)
+		nid_alloc = slab_node(current->mempolicy);
+	if (nid_alloc != nid_here)
+		return __cache_alloc_node(cachep, flags, nid_alloc);
+	return NULL;
+}
+
+/*
  * A interface to enable slab creation on nodeid
  */
 static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,