mm: avoid anon_vma_chain allocation under anon_vma lock

Hugh Dickins points out that lockdep (correctly) spots a potential
deadlock on the anon_vma lock, because we now do a GFP_KERNEL allocation
of anon_vma_chain while doing anon_vma_clone().  The problem is that
page reclaim will want to take the anon_vma lock of any anonymous pages
that it will try to reclaim.

So re-organize the code in anon_vma_clone() slightly: first do just a
GFP_NOWAIT allocation, which will usually work fine.  But if that fails,
let's just drop the lock and re-do the allocation, now with GFP_KERNEL.

End result: not only do we avoid the locking problem, this also ends up
getting better concurrency in case the allocation does need to block.
Tim Chen reports that with all these anon_vma locking tweaks, we're now
almost back up to the spinlock performance.

Reported-and-tested-by: Hugh Dickins <hughd@google.com>
Tested-by: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/rmap.c b/mm/rmap.c
index 68756a7..27dfd3b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -112,9 +112,9 @@
 	kmem_cache_free(anon_vma_cachep, anon_vma);
 }
 
-static inline struct anon_vma_chain *anon_vma_chain_alloc(void)
+static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
 {
-	return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL);
+	return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
 }
 
 static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
@@ -159,7 +159,7 @@
 		struct mm_struct *mm = vma->vm_mm;
 		struct anon_vma *allocated;
 
-		avc = anon_vma_chain_alloc();
+		avc = anon_vma_chain_alloc(GFP_KERNEL);
 		if (!avc)
 			goto out_enomem;
 
@@ -253,9 +253,14 @@
 	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
 		struct anon_vma *anon_vma;
 
-		avc = anon_vma_chain_alloc();
-		if (!avc)
-			goto enomem_failure;
+		avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
+		if (unlikely(!avc)) {
+			unlock_anon_vma_root(root);
+			root = NULL;
+			avc = anon_vma_chain_alloc(GFP_KERNEL);
+			if (!avc)
+				goto enomem_failure;
+		}
 		anon_vma = pavc->anon_vma;
 		root = lock_anon_vma_root(root, anon_vma);
 		anon_vma_chain_link(dst, avc, anon_vma);
@@ -264,7 +269,6 @@
 	return 0;
 
  enomem_failure:
-	unlock_anon_vma_root(root);
 	unlink_anon_vmas(dst);
 	return -ENOMEM;
 }
@@ -294,7 +298,7 @@
 	anon_vma = anon_vma_alloc();
 	if (!anon_vma)
 		goto out_error;
-	avc = anon_vma_chain_alloc();
+	avc = anon_vma_chain_alloc(GFP_KERNEL);
 	if (!avc)
 		goto out_error_free_anon_vma;