mm: test and set zone reclaim lock before starting reclaim

Introduces new zone flag interface for testing and setting flags:

	int zone_test_and_set_flag(struct zone *zone, zone_flags_t flag)

Instead of setting and clearing ZONE_RECLAIM_LOCKED each time shrink_zone() is
called, this flag is test and set before starting zone reclaim.  Zone reclaim
starts in __alloc_pages() when a zone's watermark fails and the system is in
zone_reclaim_mode.  If it's already in reclaim, there's no need to start again
so it is simply considered full for that allocation attempt.

There is a change of behavior with regard to concurrent zone shrinking.  It is
now possible for try_to_free_pages() or kswapd to already be shrinking a
particular zone when __alloc_pages() starts zone reclaim.  In this case, it is
possible for two concurrent threads to invoke shrink_zone() for a single zone.

This change forbids a zone to be in zone reclaim twice, which was always the
behavior, but allows for concurrent try_to_free_pages() or kswapd shrinking
when starting zone reclaim.

Cc: Andrea Arcangeli <andrea@suse.de>
Cc: Christoph Lameter <clameter@sgi.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d8893dc..e147138 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1108,8 +1108,6 @@
 	unsigned long nr_to_scan;
 	unsigned long nr_reclaimed = 0;
 
-	zone_set_flag(zone, ZONE_RECLAIM_LOCKED);
-
 	/*
 	 * Add one to `nr_to_scan' just to make sure that the kernel will
 	 * slowly sift through the active list.
@@ -1148,8 +1146,6 @@
 	}
 
 	throttle_vm_writeout(sc->gfp_mask);
-
-	zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
 	return nr_reclaimed;
 }
 
@@ -1900,6 +1896,7 @@
 int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 {
 	int node_id;
+	int ret;
 
 	/*
 	 * Zone reclaim reclaims unmapped file backed pages and
@@ -1917,13 +1914,13 @@
 			<= zone->min_slab_pages)
 		return 0;
 
+	if (zone_is_all_unreclaimable(zone))
+		return 0;
+
 	/*
-	 * Avoid concurrent zone reclaims, do not reclaim in a zone that does
-	 * not have reclaimable pages and if we should not delay the allocation
-	 * then do not scan.
+	 * Do not scan if the allocation should not be delayed.
 	 */
-	if (!(gfp_mask & __GFP_WAIT) || zone_is_all_unreclaimable(zone) ||
-		zone_is_reclaim_locked(zone) || (current->flags & PF_MEMALLOC))
+	if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
 			return 0;
 
 	/*
@@ -1935,6 +1932,12 @@
 	node_id = zone_to_nid(zone);
 	if (node_state(node_id, N_CPU) && node_id != numa_node_id())
 		return 0;
-	return __zone_reclaim(zone, gfp_mask, order);
+
+	if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
+		return 0;
+	ret = __zone_reclaim(zone, gfp_mask, order);
+	zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
+
+	return ret;
 }
 #endif