memcg: simple migration handling Now, management of "charge" under page migration is done under following manner. (Assume migrate page contents from oldpage to newpage) before - "newpage" is charged before migration. at success. - "oldpage" is uncharged at somewhere(unmap, radix-tree-replace) at failure - "newpage" is uncharged. - "oldpage" is charged if necessary (*1) But (*1) is not reliable....because of GFP_ATOMIC. This patch tries to change behavior as following by charge/commit/cancel ops. before - charge PAGE_SIZE (no target page) success - commit charge against "newpage". failure - commit charge against "oldpage". (PCG_USED bit works effectively to avoid double-counting) - if "oldpage" is obsolete, cancel charge of PAGE_SIZE. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

commit: 01b1ae63c2270cbacfd43fea94578c17950eb548 [log] [tgz]
author: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Wed Jan 07 18:07:50 2009 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Thu Jan 08 08:31:04 2009 -0800
tree: ab0275f32e8548c4413014d43cab1f52f03c9c5c
parent: bced0520fe462bb94021dcabd32e99630c171be2 [diff] [blame]
diff --git a/mm/migrate.c b/mm/migrate.c
index 246dcb9..a30ea5f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c

@@ -121,20 +121,6 @@
 	if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
 		goto out;
 
-	/*
-	 * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
-	 * Failure is not an option here: we're now expected to remove every
-	 * migration pte, and will cause crashes otherwise.  Normally this
-	 * is not an issue: mem_cgroup_prepare_migration bumped up the old
-	 * page_cgroup count for safety, that's now attached to the new page,
-	 * so this charge should just be another incrementation of the count,
-	 * to keep in balance with rmap.c's mem_cgroup_uncharging.  But if
-	 * there's been a force_empty, those reference counts may no longer
-	 * be reliable, and this charge can actually fail: oh well, we don't
-	 * make the situation any worse by proceeding as if it had succeeded.
-	 */
-	mem_cgroup_charge_migrate_fixup(new, mm, GFP_ATOMIC);
-
 	get_page(new);
 	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
 	if (is_write_migration_entry(entry))
@@ -378,9 +364,6 @@
 	anon = PageAnon(page);
 	page->mapping = NULL;
 
-	if (!anon) /* This page was removed from radix-tree. */
-		mem_cgroup_uncharge_cache_page(page);
-
 	/*
 	 * If any waiters have accumulated on the new page then
 	 * wake them up.
@@ -614,6 +597,7 @@
 	struct page *newpage = get_new_page(page, private, &result);
 	int rcu_locked = 0;
 	int charge = 0;
+	struct mem_cgroup *mem;
 
 	if (!newpage)
 		return -ENOMEM;
@@ -623,24 +607,26 @@
 		goto move_newpage;
 	}
 
-	charge = mem_cgroup_prepare_migration(page, newpage);
-	if (charge == -ENOMEM) {
-		rc = -ENOMEM;
-		goto move_newpage;
-	}
 	/* prepare cgroup just returns 0 or -ENOMEM */
-	BUG_ON(charge);
-
 	rc = -EAGAIN;
+
 	if (!trylock_page(page)) {
 		if (!force)
 			goto move_newpage;
 		lock_page(page);
 	}
 
+	/* charge against new page */
+	charge = mem_cgroup_prepare_migration(page, &mem);
+	if (charge == -ENOMEM) {
+		rc = -ENOMEM;
+		goto unlock;
+	}
+	BUG_ON(charge);
+
 	if (PageWriteback(page)) {
 		if (!force)
-			goto unlock;
+			goto uncharge;
 		wait_on_page_writeback(page);
 	}
 	/*
@@ -693,7 +679,9 @@
 rcu_unlock:
 	if (rcu_locked)
 		rcu_read_unlock();
-
+uncharge:
+	if (!charge)
+		mem_cgroup_end_migration(mem, page, newpage);
 unlock:
 	unlock_page(page);
 
@@ -709,8 +697,6 @@
 	}
 
 move_newpage:
-	if (!charge)
-		mem_cgroup_end_migration(newpage);
 
 	/*
 	 * Move the new page to the LRU. If migration was not successful
commit	01b1ae63c2270cbacfd43fea94578c17950eb548	[log] [tgz]
author	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>	Wed Jan 07 18:07:50 2009 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Thu Jan 08 08:31:04 2009 -0800
tree	ab0275f32e8548c4413014d43cab1f52f03c9c5c
parent	bced0520fe462bb94021dcabd32e99630c171be2 [diff] [blame]