mm: avoid repeated anon_vma lock/unlock sequences in anon_vma_clone() In anon_vma_clone() we traverse the vma->anon_vma_chain of the source vma, locking the anon_vma for each entry. But they are all going to have the same root entry, which means that we're locking and unlocking the same lock over and over again. Which is expensive in locked operations, but can get _really_ expensive when that root entry sees any kind of lock contention. In fact, Tim Chen reports a big performance regression due to this: when we switched to use a mutex instead of a spinlock, the contention case gets much worse. So to alleviate this all, this commit creates a small helper function (lock_anon_vma_root()) that can be used to take the lock just once rather than taking and releasing it over and over again. We still have the same "take the lock and release" it behavior in the exit path (in unlink_anon_vmas()), but that one is a bit harder to fix since we're actually freeing the anon_vma entries as we go, and that will touch the lock too. Reported-and-tested-by: Tim Chen <tim.c.chen@linux.intel.com> Tested-by: Hugh Dickins <hughd@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Andi Kleen <ak@linux.intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

commit: bb4aa39676f73b4657b3edd893ae83881c430c0c [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Thu Jun 16 20:44:51 2011 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Fri Jun 17 19:20:49 2011 -0700
tree: 6b8db9ed4a9e3fb6c232dd8447b0d24e76f5885a
parent: eb96c925152fc289311e5d7e956b919e9b60ab53 [diff] [blame]
diff --git a/mm/rmap.c b/mm/rmap.c
index 0eb463e..f286697 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c

@@ -200,6 +200,32 @@
 	return -ENOMEM;
 }
 
+/*
+ * This is a useful helper function for locking the anon_vma root as
+ * we traverse the vma->anon_vma_chain, looping over anon_vma's that
+ * have the same vma.
+ *
+ * Such anon_vma's should have the same root, so you'd expect to see
+ * just a single mutex_lock for the whole traversal.
+ */
+static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
+{
+	struct anon_vma *new_root = anon_vma->root;
+	if (new_root != root) {
+		if (WARN_ON_ONCE(root))
+			mutex_unlock(&root->mutex);
+		root = new_root;
+		mutex_lock(&root->mutex);
+	}
+	return root;
+}
+
+static inline void unlock_anon_vma_root(struct anon_vma *root)
+{
+	if (root)
+		mutex_unlock(&root->mutex);
+}
+
 static void anon_vma_chain_link(struct vm_area_struct *vma,
 				struct anon_vma_chain *avc,
 				struct anon_vma *anon_vma)
@@ -208,13 +234,11 @@
 	avc->anon_vma = anon_vma;
 	list_add(&avc->same_vma, &vma->anon_vma_chain);
 
-	anon_vma_lock(anon_vma);
 	/*
 	 * It's critical to add new vmas to the tail of the anon_vma,
 	 * see comment in huge_memory.c:__split_huge_page().
 	 */
 	list_add_tail(&avc->same_anon_vma, &anon_vma->head);
-	anon_vma_unlock(anon_vma);
 }
 
 /*
@@ -224,16 +248,23 @@
 int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 {
 	struct anon_vma_chain *avc, *pavc;
+	struct anon_vma *root = NULL;
 
 	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
+		struct anon_vma *anon_vma;
+
 		avc = anon_vma_chain_alloc();
 		if (!avc)
 			goto enomem_failure;
-		anon_vma_chain_link(dst, avc, pavc->anon_vma);
+		anon_vma = pavc->anon_vma;
+		root = lock_anon_vma_root(root, anon_vma);
+		anon_vma_chain_link(dst, avc, anon_vma);
 	}
+	unlock_anon_vma_root(root);
 	return 0;
 
  enomem_failure:
+	unlock_anon_vma_root(root);
 	unlink_anon_vmas(dst);
 	return -ENOMEM;
 }
@@ -280,7 +311,9 @@
 	get_anon_vma(anon_vma->root);
 	/* Mark this anon_vma as the one where our new (COWed) pages go. */
 	vma->anon_vma = anon_vma;
+	anon_vma_lock(anon_vma);
 	anon_vma_chain_link(vma, avc, anon_vma);
+	anon_vma_unlock(anon_vma);
 
 	return 0;
commit	bb4aa39676f73b4657b3edd893ae83881c430c0c	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Thu Jun 16 20:44:51 2011 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Fri Jun 17 19:20:49 2011 -0700
tree	6b8db9ed4a9e3fb6c232dd8447b0d24e76f5885a
parent	eb96c925152fc289311e5d7e956b919e9b60ab53 [diff] [blame]