ksm: reorganize ksm_check_stable_tree

Memory hotremove's ksm_check_stable_tree() is pitifully inefficient
(restarting whenever it finds a stale node to remove), but rearrange so
that at least it does not needlessly restart from nid 0 each time.  And
add a couple of comments: here is why we keep pfn instead of page.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Petr Holasek <pholasek@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Izik Eidus <izik.eidus@ravellosystems.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/ksm.c b/mm/ksm.c
index e10dc24..70daa35 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1830,31 +1830,36 @@
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn,
-						 unsigned long end_pfn)
+static void ksm_check_stable_tree(unsigned long start_pfn,
+				  unsigned long end_pfn)
 {
+	struct stable_node *stable_node;
 	struct rb_node *node;
 	int nid;
 
-	for (nid = 0; nid < nr_node_ids; nid++)
-		for (node = rb_first(&root_stable_tree[nid]); node;
-				node = rb_next(node)) {
-			struct stable_node *stable_node;
-
+	for (nid = 0; nid < nr_node_ids; nid++) {
+		node = rb_first(&root_stable_tree[nid]);
+		while (node) {
 			stable_node = rb_entry(node, struct stable_node, node);
 			if (stable_node->kpfn >= start_pfn &&
-			    stable_node->kpfn < end_pfn)
-				return stable_node;
+			    stable_node->kpfn < end_pfn) {
+				/*
+				 * Don't get_ksm_page, page has already gone:
+				 * which is why we keep kpfn instead of page*
+				 */
+				remove_node_from_stable_tree(stable_node);
+				node = rb_first(&root_stable_tree[nid]);
+			} else
+				node = rb_next(node);
+			cond_resched();
 		}
-
-	return NULL;
+	}
 }
 
 static int ksm_memory_callback(struct notifier_block *self,
 			       unsigned long action, void *arg)
 {
 	struct memory_notify *mn = arg;
-	struct stable_node *stable_node;
 
 	switch (action) {
 	case MEM_GOING_OFFLINE:
@@ -1874,11 +1879,12 @@
 		/*
 		 * Most of the work is done by page migration; but there might
 		 * be a few stable_nodes left over, still pointing to struct
-		 * pages which have been offlined: prune those from the tree.
+		 * pages which have been offlined: prune those from the tree,
+		 * otherwise get_ksm_page() might later try to access a
+		 * non-existent struct page.
 		 */
-		while ((stable_node = ksm_check_stable_tree(mn->start_pfn,
-					mn->start_pfn + mn->nr_pages)) != NULL)
-			remove_node_from_stable_tree(stable_node);
+		ksm_check_stable_tree(mn->start_pfn,
+				      mn->start_pfn + mn->nr_pages);
 		/* fallthrough */
 
 	case MEM_CANCEL_OFFLINE: