ksm: rmap_walk to remove_migation_ptes A side-effect of making ksm pages swappable is that they have to be placed on the LRUs: which then exposes them to isolate_lru_page() and hence to page migration. Add rmap_walk() for remove_migration_ptes() to use: rmap_walk_anon() and rmap_walk_file() in rmap.c, but rmap_walk_ksm() in ksm.c. Perhaps some consolidation with existing code is possible, but don't attempt that yet (try_to_unmap needs to handle nonlinears, but migration pte removal does not). rmap_walk() is sadly less general than it appears: rmap_walk_anon(), like remove_anon_migration_ptes() which it replaces, avoids calling page_lock_anon_vma(), because that includes a page_mapped() test which fails when all migration ptes are in place. That was valid when NUMA page migration was introduced (holding mmap_sem provided the missing guarantee that anon_vma's slab had not already been destroyed), but I believe not valid in the memory hotremove case added since. For now do the same as before, and consider the best way to fix that unlikely race later on. When fixed, we can probably use rmap_walk() on hwpoisoned ksm pages too: for now, they remain among hwpoison's various exceptions (its PageKsm test comes before the page is locked, but its page_lock_anon_vma fails safely if an anon gets upgraded). Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Izik Eidus <ieidus@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Chris Wright <chrisw@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

commit: e9995ef978a7d5296fe04a9a2c5ca6e66d8bb4e5 [log] [tgz]
author: Hugh Dickins <hugh.dickins@tiscali.co.uk> Mon Dec 14 17:59:31 2009 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Tue Dec 15 08:53:20 2009 -0800
tree: df4324273856e06b8277b7e4a0fa9289eb8e6385
parent: 407f9c8b0889ced1dbe2f9157e4e60c61329d5c9 [diff] [blame]
diff --git a/mm/ksm.c b/mm/ksm.c
index 20f46a7..dfdc292 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c

@@ -1656,6 +1656,71 @@
 	return ret;
 }
 
+#ifdef CONFIG_MIGRATION
+int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
+		  struct vm_area_struct *, unsigned long, void *), void *arg)
+{
+	struct stable_node *stable_node;
+	struct hlist_node *hlist;
+	struct rmap_item *rmap_item;
+	int ret = SWAP_AGAIN;
+	int search_new_forks = 0;
+
+	VM_BUG_ON(!PageKsm(page));
+	VM_BUG_ON(!PageLocked(page));
+
+	stable_node = page_stable_node(page);
+	if (!stable_node)
+		return ret;
+again:
+	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
+		struct anon_vma *anon_vma = rmap_item->anon_vma;
+		struct vm_area_struct *vma;
+
+		spin_lock(&anon_vma->lock);
+		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+			if (rmap_item->address < vma->vm_start ||
+			    rmap_item->address >= vma->vm_end)
+				continue;
+			/*
+			 * Initially we examine only the vma which covers this
+			 * rmap_item; but later, if there is still work to do,
+			 * we examine covering vmas in other mms: in case they
+			 * were forked from the original since ksmd passed.
+			 */
+			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
+				continue;
+
+			ret = rmap_one(page, vma, rmap_item->address, arg);
+			if (ret != SWAP_AGAIN) {
+				spin_unlock(&anon_vma->lock);
+				goto out;
+			}
+		}
+		spin_unlock(&anon_vma->lock);
+	}
+	if (!search_new_forks++)
+		goto again;
+out:
+	return ret;
+}
+
+void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+{
+	struct stable_node *stable_node;
+
+	VM_BUG_ON(!PageLocked(oldpage));
+	VM_BUG_ON(!PageLocked(newpage));
+	VM_BUG_ON(newpage->mapping != oldpage->mapping);
+
+	stable_node = page_stable_node(newpage);
+	if (stable_node) {
+		VM_BUG_ON(stable_node->page != oldpage);
+		stable_node->page = newpage;
+	}
+}
+#endif /* CONFIG_MIGRATION */
+
 #ifdef CONFIG_SYSFS
 /*
  * This all compiles without CONFIG_SYSFS, but is a waste of space.
commit	e9995ef978a7d5296fe04a9a2c5ca6e66d8bb4e5	[log] [tgz]
author	Hugh Dickins <hugh.dickins@tiscali.co.uk>	Mon Dec 14 17:59:31 2009 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Tue Dec 15 08:53:20 2009 -0800
tree	df4324273856e06b8277b7e4a0fa9289eb8e6385
parent	407f9c8b0889ced1dbe2f9157e4e60c61329d5c9 [diff] [blame]