mm: have zonelist contains structs with both a zone pointer and zone_idx

Filtering zonelists requires very frequent use of zone_idx().  This is costly
as it involves a lookup of another structure and a substraction operation.  As
the zone_idx is often required, it should be quickly accessible.  The node idx
could also be stored here if it was found that accessing zone->node is
significant which may be the case on workloads where nodemasks are heavily
used.

This patch introduces a struct zoneref to store a zone pointer and a zone
index.  The zonelist then consists of an array of these struct zonerefs which
are looked up as necessary.  Helpers are given for accessing the zone index as
well as the node index.

[kamezawa.hiroyu@jp.fujitsu.com: Suggested struct zoneref instead of embedding information in pointers]
[hugh@veritas.com: mm-have-zonelist: fix memcg ooms]
[hugh@veritas.com: just return do_try_to_free_pages]
[hugh@veritas.com: do_try_to_free_pages gfp_mask redundant]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <clameter@sgi.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5d20bf4..90193a2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -186,7 +186,7 @@
 		for_each_node_mask(nd, *nodes) { 
 			struct zone *z = &NODE_DATA(nd)->node_zones[k];
 			if (z->present_pages > 0) 
-				zl->zones[num++] = z;
+				zoneref_set_zone(z, &zl->_zonerefs[num++]);
 		}
 		if (k == 0)
 			break;
@@ -196,7 +196,8 @@
 		kfree(zl);
 		return ERR_PTR(-EINVAL);
 	}
-	zl->zones[num] = NULL;
+	zl->_zonerefs[num].zone = NULL;
+	zl->_zonerefs[num].zone_idx = 0;
 	return zl;
 }
 
@@ -504,9 +505,11 @@
 	nodes_clear(*nodes);
 	switch (p->policy) {
 	case MPOL_BIND:
-		for (i = 0; p->v.zonelist->zones[i]; i++)
-			node_set(zone_to_nid(p->v.zonelist->zones[i]),
-				*nodes);
+		for (i = 0; p->v.zonelist->_zonerefs[i].zone; i++) {
+			struct zoneref *zref;
+			zref = &p->v.zonelist->_zonerefs[i];
+			node_set(zonelist_node_idx(zref), *nodes);
+		}
 		break;
 	case MPOL_DEFAULT:
 		break;
@@ -1212,12 +1215,13 @@
 	case MPOL_INTERLEAVE:
 		return interleave_nodes(policy);
 
-	case MPOL_BIND:
+	case MPOL_BIND: {
 		/*
 		 * Follow bind policy behavior and start allocation at the
 		 * first node.
 		 */
-		return zone_to_nid(policy->v.zonelist->zones[0]);
+		return zonelist_node_idx(policy->v.zonelist->_zonerefs);
+	}
 
 	case MPOL_PREFERRED:
 		if (policy->v.preferred_node >= 0)
@@ -1323,7 +1327,7 @@
 
 	zl = node_zonelist(nid, gfp);
 	page = __alloc_pages(gfp, order, zl);
-	if (page && page_zone(page) == zl->zones[0])
+	if (page && page_zone(page) == zonelist_zone(&zl->_zonerefs[0]))
 		inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
 	return page;
 }
@@ -1463,10 +1467,14 @@
 		return a->v.preferred_node == b->v.preferred_node;
 	case MPOL_BIND: {
 		int i;
-		for (i = 0; a->v.zonelist->zones[i]; i++)
-			if (a->v.zonelist->zones[i] != b->v.zonelist->zones[i])
+		for (i = 0; a->v.zonelist->_zonerefs[i].zone; i++) {
+			struct zone *za, *zb;
+			za = zonelist_zone(&a->v.zonelist->_zonerefs[i]);
+			zb = zonelist_zone(&b->v.zonelist->_zonerefs[i]);
+			if (za != zb)
 				return 0;
-		return b->v.zonelist->zones[i] == NULL;
+		}
+		return b->v.zonelist->_zonerefs[i].zone == NULL;
 	}
 	default:
 		BUG();
@@ -1785,12 +1793,12 @@
 		break;
 	case MPOL_BIND: {
 		nodemask_t nodes;
-		struct zone **z;
+		struct zoneref *z;
 		struct zonelist *zonelist;
 
 		nodes_clear(nodes);
-		for (z = pol->v.zonelist->zones; *z; z++)
-			node_set(zone_to_nid(*z), nodes);
+		for (z = pol->v.zonelist->_zonerefs; z->zone; z++)
+			node_set(zonelist_node_idx(z), nodes);
 		nodes_remap(tmp, nodes, *mpolmask, *newmask);
 		nodes = tmp;