Blame - mm/migrate.c - kernel/msm-4.19

blob: c38778610aa8cd32e377b588f2178e68b1f17d89 [file] [log] [blame]

Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1	/*
				2	* Memory Migration functionality - linux/mm/migration.c
				3	*
				4	* Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
				5	*
				6	* Page migration was first developed in the context of the memory hotplug
				7	* project. The main authors of the migration code are:
				8	*
				9	* IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
				10	* Hirokazu Takahashi <taka@valinux.co.jp>
				11	* Dave Hansen <haveblue@us.ibm.com>
Christoph Lameter	cde5353	2008-07-04 09:59:22 -0700	[diff] [blame]	12	* Christoph Lameter
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	13	*/
				14
				15	#include <linux/migrate.h>
Paul Gortmaker	b95f1b31	2011-10-16 02:01:52 -0400	[diff] [blame]	16	#include <linux/export.h>
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	17	#include <linux/swap.h>
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	18	#include <linux/swapops.h>
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	19	#include <linux/pagemap.h>
Christoph Lameter	e23ca00	2006-04-10 22:52:57 -0700	[diff] [blame]	20	#include <linux/buffer_head.h>
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	21	#include <linux/mm_inline.h>
Pavel Emelyanov	b488893	2007-10-18 23:40:14 -0700	[diff] [blame]	22	#include <linux/nsproxy.h>
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	23	#include <linux/pagevec.h>
Hugh Dickins	e9995ef	2009-12-14 17:59:31 -0800	[diff] [blame]	24	#include <linux/ksm.h>
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	25	#include <linux/rmap.h>
				26	#include <linux/topology.h>
				27	#include <linux/cpu.h>
				28	#include <linux/cpuset.h>
Christoph Lameter	04e62a2	2006-06-23 02:03:38 -0700	[diff] [blame]	29	#include <linux/writeback.h>
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	30	#include <linux/mempolicy.h>
				31	#include <linux/vmalloc.h>
David Quigley	86c3a76	2006-06-23 02:04:02 -0700	[diff] [blame]	32	#include <linux/security.h>
Balbir Singh	8a9f3cc	2008-02-07 00:13:53 -0800	[diff] [blame]	33	#include <linux/memcontrol.h>
Adrian Bunk	4f5ca26	2008-07-23 21:27:02 -0700	[diff] [blame]	34	#include <linux/syscalls.h>
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	35	#include <linux/hugetlb.h>
Aneesh Kumar K.V	8e6ac7f	2012-07-31 16:42:27 -0700	[diff] [blame]	36	#include <linux/hugetlb_cgroup.h>
Tejun Heo	5a0e3ad	2010-03-24 17:04:11 +0900	[diff] [blame]	37	#include <linux/gfp.h>
Rafael Aquini	bf6bddf	2012-12-11 16:02:42 -0800	[diff] [blame]	38	#include <linux/balloon_compaction.h>
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	39
Michal Nazarewicz	0d1836c	2010-12-21 17:24:26 -0800	[diff] [blame]	40	#include <asm/tlbflush.h>
				41
Mel Gorman	7b2a2d4	2012-10-19 14:07:31 +0100	[diff] [blame]	42	#define CREATE_TRACE_POINTS
				43	#include <trace/events/migrate.h>
				44
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	45	#include "internal.h"
				46
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	47	/*
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	48	* migrate_prep() needs to be called before we start compiling a list of pages
Mel Gorman	748446b	2010-05-24 14:32:27 -0700	[diff] [blame]	49	* to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
				50	* undesirable, use migrate_prep_local()
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	51	*/
				52	int migrate_prep(void)
				53	{
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	54	/*
				55	* Clear the LRU lists so pages can be isolated.
				56	* Note that pages may be moved off the LRU after we have
				57	* drained them. Those pages will fail to migrate like other
				58	* pages that may be busy.
				59	*/
				60	lru_add_drain_all();
				61
				62	return 0;
				63	}
				64
Mel Gorman	748446b	2010-05-24 14:32:27 -0700	[diff] [blame]	65	/* Do the necessary work of migrate_prep but not if it involves other CPUs */
				66	int migrate_prep_local(void)
				67	{
				68	lru_add_drain();
				69
				70	return 0;
				71	}
				72
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	73	/*
Lee Schermerhorn	894bc31	2008-10-18 20:26:39 -0700	[diff] [blame]	74	* Add isolated pages on the list back to the LRU under page lock
				75	* to avoid leaking evictable pages back onto unevictable list.
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	76	*/
Minchan Kim	e13861d	2010-05-24 14:31:59 -0700	[diff] [blame]	77	void putback_lru_pages(struct list_head *l)
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	78	{
				79	struct page *page;
				80	struct page *page2;
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	81
				82	list_for_each_entry_safe(page, page2, l, lru) {
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	83	list_del(&page->lru);
KOSAKI Motohiro	a731286	2009-09-21 17:01:37 -0700	[diff] [blame]	84	dec_zone_page_state(page, NR_ISOLATED_ANON +
Johannes Weiner	6c0b135	2009-09-21 17:02:59 -0700	[diff] [blame]	85	page_is_file_cache(page));
Rafael Aquini	5733c7d	2012-12-11 16:02:47 -0800	[diff] [blame]	86	putback_lru_page(page);
				87	}
				88	}
				89
				90	/*
				91	* Put previously isolated pages back onto the appropriate lists
				92	* from where they were once taken off for compaction/migration.
				93	*
				94	* This function shall be used instead of putback_lru_pages(),
				95	* whenever the isolated pageset has been built by isolate_migratepages_range()
				96	*/
				97	void putback_movable_pages(struct list_head *l)
				98	{
				99	struct page *page;
				100	struct page *page2;
				101
				102	list_for_each_entry_safe(page, page2, l, lru) {
				103	list_del(&page->lru);
				104	dec_zone_page_state(page, NR_ISOLATED_ANON +
				105	page_is_file_cache(page));
Rafael Aquini	bf6bddf	2012-12-11 16:02:42 -0800	[diff] [blame]	106	if (unlikely(balloon_page_movable(page)))
				107	balloon_page_putback(page);
				108	else
				109	putback_lru_page(page);
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	110	}
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	111	}
				112
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	113	/*
				114	* Restore a potential migration pte to a working pte entry
				115	*/
Hugh Dickins	e9995ef	2009-12-14 17:59:31 -0800	[diff] [blame]	116	static int remove_migration_pte(struct page new, struct vm_area_struct vma,
				117	unsigned long addr, void *old)
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	118	{
				119	struct mm_struct *mm = vma->vm_mm;
				120	swp_entry_t entry;
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	121	pmd_t *pmd;
				122	pte_t *ptep, pte;
				123	spinlock_t *ptl;
				124
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	125	if (unlikely(PageHuge(new))) {
				126	ptep = huge_pte_offset(mm, addr);
				127	if (!ptep)
				128	goto out;
				129	ptl = &mm->page_table_lock;
				130	} else {
Bob Liu	6219049	2012-12-11 16:00:37 -0800	[diff] [blame]	131	pmd = mm_find_pmd(mm, addr);
				132	if (!pmd)
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	133	goto out;
Andrea Arcangeli	500d65d	2011-01-13 15:46:55 -0800	[diff] [blame]	134	if (pmd_trans_huge(*pmd))
				135	goto out;
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	136
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	137	ptep = pte_offset_map(pmd, addr);
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	138
Hugh Dickins	486cf46	2011-10-19 12:50:35 -0700	[diff] [blame]	139	/*
				140	* Peek to check is_swap_pte() before taking ptlock? No, we
				141	* can race mremap's move_ptes(), which skips anon_vma lock.
				142	*/
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	143
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	144	ptl = pte_lockptr(mm, pmd);
				145	}
				146
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	147	spin_lock(ptl);
				148	pte = *ptep;
				149	if (!is_swap_pte(pte))
Hugh Dickins	e9995ef	2009-12-14 17:59:31 -0800	[diff] [blame]	150	goto unlock;
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	151
				152	entry = pte_to_swp_entry(pte);
				153
Hugh Dickins	e9995ef	2009-12-14 17:59:31 -0800	[diff] [blame]	154	if (!is_migration_entry(entry) \|\|
				155	migration_entry_to_page(entry) != old)
				156	goto unlock;
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	157
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	158	get_page(new);
				159	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
				160	if (is_write_migration_entry(entry))
				161	pte = pte_mkwrite(pte);
Andi Kleen	3ef8fd7	2010-10-11 16:03:21 +0200	[diff] [blame]	162	#ifdef CONFIG_HUGETLB_PAGE
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	163	if (PageHuge(new))
				164	pte = pte_mkhuge(pte);
Andi Kleen	3ef8fd7	2010-10-11 16:03:21 +0200	[diff] [blame]	165	#endif
KAMEZAWA Hiroyuki	97ee052	2007-10-16 01:25:43 -0700	[diff] [blame]	166	flush_cache_page(vma, addr, pte_pfn(pte));
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	167	set_pte_at(mm, addr, ptep, pte);
Christoph Lameter	04e62a2	2006-06-23 02:03:38 -0700	[diff] [blame]	168
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	169	if (PageHuge(new)) {
				170	if (PageAnon(new))
				171	hugepage_add_anon_rmap(new, vma, addr);
				172	else
				173	page_dup_rmap(new);
				174	} else if (PageAnon(new))
Christoph Lameter	04e62a2	2006-06-23 02:03:38 -0700	[diff] [blame]	175	page_add_anon_rmap(new, vma, addr);
				176	else
				177	page_add_file_rmap(new);
				178
				179	/* No need to invalidate - it was non-present before */
Russell King	4b3073e	2009-12-18 16:40:18 +0000	[diff] [blame]	180	update_mmu_cache(vma, addr, ptep);
Hugh Dickins	e9995ef	2009-12-14 17:59:31 -0800	[diff] [blame]	181	unlock:
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	182	pte_unmap_unlock(ptep, ptl);
Hugh Dickins	e9995ef	2009-12-14 17:59:31 -0800	[diff] [blame]	183	out:
				184	return SWAP_AGAIN;
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	185	}
				186
				187	/*
Christoph Lameter	04e62a2	2006-06-23 02:03:38 -0700	[diff] [blame]	188	* Get rid of all migration entries and replace them by
				189	* references to the indicated page.
				190	*/
				191	static void remove_migration_ptes(struct page old, struct page new)
				192	{
Hugh Dickins	e9995ef	2009-12-14 17:59:31 -0800	[diff] [blame]	193	rmap_walk(new, remove_migration_pte, old);
Christoph Lameter	04e62a2	2006-06-23 02:03:38 -0700	[diff] [blame]	194	}
				195
				196	/*
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	197	* Something used the pte of a page under migration. We need to
				198	* get to the page and wait until migration is finished.
				199	* When we return from this function the fault will be retried.
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	200	*/
				201	void migration_entry_wait(struct mm_struct mm, pmd_t pmd,
				202	unsigned long address)
				203	{
				204	pte_t *ptep, pte;
				205	spinlock_t *ptl;
				206	swp_entry_t entry;
				207	struct page *page;
				208
				209	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
				210	pte = *ptep;
				211	if (!is_swap_pte(pte))
				212	goto out;
				213
				214	entry = pte_to_swp_entry(pte);
				215	if (!is_migration_entry(entry))
				216	goto out;
				217
				218	page = migration_entry_to_page(entry);
				219
Nick Piggin	e286781	2008-07-25 19:45:30 -0700	[diff] [blame]	220	/*
				221	* Once radix-tree replacement of page migration started, page_count
				222	* must be zero. And, we don't want to call wait_on_page_locked()
				223	* against a page without get_page().
				224	* So, we use get_page_unless_zero(), here. Even failed, page fault
				225	* will occur again.
				226	*/
				227	if (!get_page_unless_zero(page))
				228	goto out;
Christoph Lameter	0697212	2006-06-23 02:03:35 -0700	[diff] [blame]	229	pte_unmap_unlock(ptep, ptl);
				230	wait_on_page_locked(page);
				231	put_page(page);
				232	return;
				233	out:
				234	pte_unmap_unlock(ptep, ptl);
				235	}
				236
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	237	#ifdef CONFIG_BLOCK
				238	/* Returns true if all buffers are successfully locked */
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	239	static bool buffer_migrate_lock_buffers(struct buffer_head *head,
				240	enum migrate_mode mode)
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	241	{
				242	struct buffer_head *bh = head;
				243
				244	/* Simple case, sync compaction */
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	245	if (mode != MIGRATE_ASYNC) {
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	246	do {
				247	get_bh(bh);
				248	lock_buffer(bh);
				249	bh = bh->b_this_page;
				250
				251	} while (bh != head);
				252
				253	return true;
				254	}
				255
				256	/* async case, we cannot block on lock_buffer so use trylock_buffer */
				257	do {
				258	get_bh(bh);
				259	if (!trylock_buffer(bh)) {
				260	/*
				261	* We failed to lock the buffer and cannot stall in
				262	* async migration. Release the taken locks
				263	*/
				264	struct buffer_head *failed_bh = bh;
				265	put_bh(failed_bh);
				266	bh = head;
				267	while (bh != failed_bh) {
				268	unlock_buffer(bh);
				269	put_bh(bh);
				270	bh = bh->b_this_page;
				271	}
				272	return false;
				273	}
				274
				275	bh = bh->b_this_page;
				276	} while (bh != head);
				277	return true;
				278	}
				279	#else
				280	static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	281	enum migrate_mode mode)
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	282	{
				283	return true;
				284	}
				285	#endif /* CONFIG_BLOCK */
				286
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	287	/*
Christoph Lameter	c3fcf8a	2006-06-23 02:03:32 -0700	[diff] [blame]	288	* Replace the page in the mapping.
Christoph Lameter	5b5c712	2006-06-23 02:03:29 -0700	[diff] [blame]	289	*
				290	* The number of remaining references must be:
				291	* 1 for anonymous pages without a mapping
				292	* 2 for pages with a mapping
David Howells	266cf65	2009-04-03 16:42:36 +0100	[diff] [blame]	293	* 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	294	*/
Christoph Lameter	2d1db3b	2006-06-23 02:03:33 -0700	[diff] [blame]	295	static int migrate_page_move_mapping(struct address_space *mapping,
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	296	struct page newpage, struct page page,
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	297	struct buffer_head *head, enum migrate_mode mode)
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	298	{
Peter Zijlstra	7039e1d	2012-10-25 14:16:34 +0200	[diff] [blame]	299	int expected_count = 0;
Nick Piggin	7cf9c2c	2006-12-06 20:33:44 -0800	[diff] [blame]	300	void **pslot;
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	301
Christoph Lameter	6c5240a	2006-06-23 02:03:37 -0700	[diff] [blame]	302	if (!mapping) {
Christoph Lameter	0e8c7d0	2007-04-23 14:41:09 -0700	[diff] [blame]	303	/* Anonymous page without mapping */
Christoph Lameter	6c5240a	2006-06-23 02:03:37 -0700	[diff] [blame]	304	if (page_count(page) != 1)
				305	return -EAGAIN;
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	306	return MIGRATEPAGE_SUCCESS;
Christoph Lameter	6c5240a	2006-06-23 02:03:37 -0700	[diff] [blame]	307	}
				308
Nick Piggin	19fd623	2008-07-25 19:45:32 -0700	[diff] [blame]	309	spin_lock_irq(&mapping->tree_lock);
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	310
Nick Piggin	7cf9c2c	2006-12-06 20:33:44 -0800	[diff] [blame]	311	pslot = radix_tree_lookup_slot(&mapping->page_tree,
				312	page_index(page));
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	313
Johannes Weiner	edcf474	2009-09-21 17:02:59 -0700	[diff] [blame]	314	expected_count = 2 + page_has_private(page);
Nick Piggin	e286781	2008-07-25 19:45:30 -0700	[diff] [blame]	315	if (page_count(page) != expected_count \|\|
Mel Gorman	29c1f67	2011-01-13 15:47:21 -0800	[diff] [blame]	316	radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
Nick Piggin	19fd623	2008-07-25 19:45:32 -0700	[diff] [blame]	317	spin_unlock_irq(&mapping->tree_lock);
Christoph Lameter	e23ca00	2006-04-10 22:52:57 -0700	[diff] [blame]	318	return -EAGAIN;
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	319	}
				320
Nick Piggin	e286781	2008-07-25 19:45:30 -0700	[diff] [blame]	321	if (!page_freeze_refs(page, expected_count)) {
Nick Piggin	19fd623	2008-07-25 19:45:32 -0700	[diff] [blame]	322	spin_unlock_irq(&mapping->tree_lock);
Nick Piggin	e286781	2008-07-25 19:45:30 -0700	[diff] [blame]	323	return -EAGAIN;
				324	}
				325
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	326	/*
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	327	* In the async migration case of moving a page with buffers, lock the
				328	* buffers using trylock before the mapping is moved. If the mapping
				329	* was moved, we later failed to lock the buffers and could not move
				330	* the mapping back due to an elevated page count, we would have to
				331	* block waiting on other references to be dropped.
				332	*/
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	333	if (mode == MIGRATE_ASYNC && head &&
				334	!buffer_migrate_lock_buffers(head, mode)) {
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	335	page_unfreeze_refs(page, expected_count);
				336	spin_unlock_irq(&mapping->tree_lock);
				337	return -EAGAIN;
				338	}
				339
				340	/*
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	341	* Now we know that no one else is looking at the page.
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	342	*/
Nick Piggin	7cf9c2c	2006-12-06 20:33:44 -0800	[diff] [blame]	343	get_page(newpage); /* add cache reference */
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	344	if (PageSwapCache(page)) {
				345	SetPageSwapCache(newpage);
				346	set_page_private(newpage, page_private(page));
				347	}
				348
Nick Piggin	7cf9c2c	2006-12-06 20:33:44 -0800	[diff] [blame]	349	radix_tree_replace_slot(pslot, newpage);
				350
				351	/*
Jacobo Giralt	937a94c	2012-01-10 15:07:11 -0800	[diff] [blame]	352	* Drop cache reference from old page by unfreezing
				353	* to one less reference.
Nick Piggin	7cf9c2c	2006-12-06 20:33:44 -0800	[diff] [blame]	354	* We know this isn't the last reference.
				355	*/
Jacobo Giralt	937a94c	2012-01-10 15:07:11 -0800	[diff] [blame]	356	page_unfreeze_refs(page, expected_count - 1);
Nick Piggin	7cf9c2c	2006-12-06 20:33:44 -0800	[diff] [blame]	357
Christoph Lameter	0e8c7d0	2007-04-23 14:41:09 -0700	[diff] [blame]	358	/*
				359	* If moved to a different zone then also account
				360	* the page for that zone. Other VM counters will be
				361	* taken care of when we establish references to the
				362	* new page and drop references to the old page.
				363	*
				364	* Note that anonymous pages are accounted for
				365	* via NR_FILE_PAGES and NR_ANON_PAGES if they
				366	* are mapped to swap space.
				367	*/
				368	__dec_zone_page_state(page, NR_FILE_PAGES);
				369	__inc_zone_page_state(newpage, NR_FILE_PAGES);
Andrea Arcangeli	99a15e2	2011-06-16 12:56:19 -0700	[diff] [blame]	370	if (!PageSwapCache(page) && PageSwapBacked(page)) {
KOSAKI Motohiro	4b02108	2009-09-21 17:01:33 -0700	[diff] [blame]	371	__dec_zone_page_state(page, NR_SHMEM);
				372	__inc_zone_page_state(newpage, NR_SHMEM);
				373	}
Nick Piggin	19fd623	2008-07-25 19:45:32 -0700	[diff] [blame]	374	spin_unlock_irq(&mapping->tree_lock);
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	375
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	376	return MIGRATEPAGE_SUCCESS;
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	377	}
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	378
				379	/*
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	380	* The expected number of remaining references is the same as that
				381	* of migrate_page_move_mapping().
				382	*/
				383	int migrate_huge_page_move_mapping(struct address_space *mapping,
				384	struct page newpage, struct page page)
				385	{
				386	int expected_count;
				387	void **pslot;
				388
				389	if (!mapping) {
				390	if (page_count(page) != 1)
				391	return -EAGAIN;
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	392	return MIGRATEPAGE_SUCCESS;
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	393	}
				394
				395	spin_lock_irq(&mapping->tree_lock);
				396
				397	pslot = radix_tree_lookup_slot(&mapping->page_tree,
				398	page_index(page));
				399
				400	expected_count = 2 + page_has_private(page);
				401	if (page_count(page) != expected_count \|\|
Mel Gorman	29c1f67	2011-01-13 15:47:21 -0800	[diff] [blame]	402	radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	403	spin_unlock_irq(&mapping->tree_lock);
				404	return -EAGAIN;
				405	}
				406
				407	if (!page_freeze_refs(page, expected_count)) {
				408	spin_unlock_irq(&mapping->tree_lock);
				409	return -EAGAIN;
				410	}
				411
				412	get_page(newpage);
				413
				414	radix_tree_replace_slot(pslot, newpage);
				415
Jacobo Giralt	937a94c	2012-01-10 15:07:11 -0800	[diff] [blame]	416	page_unfreeze_refs(page, expected_count - 1);
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	417
				418	spin_unlock_irq(&mapping->tree_lock);
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	419	return MIGRATEPAGE_SUCCESS;
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	420	}
				421
				422	/*
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	423	* Copy the page to its new location
				424	*/
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	425	void migrate_page_copy(struct page newpage, struct page page)
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	426	{
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	427	if (PageHuge(page) \|\| PageTransHuge(page))
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	428	copy_huge_page(newpage, page);
				429	else
				430	copy_highpage(newpage, page);
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	431
				432	if (PageError(page))
				433	SetPageError(newpage);
				434	if (PageReferenced(page))
				435	SetPageReferenced(newpage);
				436	if (PageUptodate(page))
				437	SetPageUptodate(newpage);
Lee Schermerhorn	894bc31	2008-10-18 20:26:39 -0700	[diff] [blame]	438	if (TestClearPageActive(page)) {
				439	VM_BUG_ON(PageUnevictable(page));
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	440	SetPageActive(newpage);
Lee Schermerhorn	418b27e	2009-12-14 17:59:54 -0800	[diff] [blame]	441	} else if (TestClearPageUnevictable(page))
				442	SetPageUnevictable(newpage);
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	443	if (PageChecked(page))
				444	SetPageChecked(newpage);
				445	if (PageMappedToDisk(page))
				446	SetPageMappedToDisk(newpage);
				447
				448	if (PageDirty(page)) {
				449	clear_page_dirty_for_io(page);
Nick Piggin	3a902c5	2008-04-30 00:55:16 -0700	[diff] [blame]	450	/*
				451	* Want to mark the page and the radix tree as dirty, and
				452	* redo the accounting that clear_page_dirty_for_io undid,
				453	* but we can't use set_page_dirty because that function
				454	* is actually a signal that all of the page has become dirty.
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	455	* Whereas only part of our page may be dirty.
Nick Piggin	3a902c5	2008-04-30 00:55:16 -0700	[diff] [blame]	456	*/
Hugh Dickins	752dc18	2012-06-02 00:27:47 -0700	[diff] [blame]	457	if (PageSwapBacked(page))
				458	SetPageDirty(newpage);
				459	else
				460	__set_page_dirty_nobuffers(newpage);
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	461	}
				462
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	463	mlock_migrate_page(newpage, page);
Hugh Dickins	e9995ef	2009-12-14 17:59:31 -0800	[diff] [blame]	464	ksm_migrate_page(newpage, page);
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	465
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	466	ClearPageSwapCache(page);
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	467	ClearPagePrivate(page);
				468	set_page_private(page, 0);
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	469
				470	/*
				471	* If any waiters have accumulated on the new page then
				472	* wake them up.
				473	*/
				474	if (PageWriteback(newpage))
				475	end_page_writeback(newpage);
				476	}
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	477
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	478	/************************************************************
				479	* Migration functions
				480	***********************************************************/
				481
				482	/* Always fail migration. Used for mappings that are not movable */
Christoph Lameter	2d1db3b	2006-06-23 02:03:33 -0700	[diff] [blame]	483	int fail_migrate_page(struct address_space *mapping,
				484	struct page newpage, struct page page)
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	485	{
				486	return -EIO;
				487	}
				488	EXPORT_SYMBOL(fail_migrate_page);
				489
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	490	/*
				491	* Common logic to directly migrate a single page suitable for
David Howells	266cf65	2009-04-03 16:42:36 +0100	[diff] [blame]	492	* pages that do not use PagePrivate/PagePrivate2.
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	493	*
				494	* Pages are locked upon entry and exit.
				495	*/
Christoph Lameter	2d1db3b	2006-06-23 02:03:33 -0700	[diff] [blame]	496	int migrate_page(struct address_space *mapping,
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	497	struct page newpage, struct page page,
				498	enum migrate_mode mode)
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	499	{
				500	int rc;
				501
				502	BUG_ON(PageWriteback(page)); /* Writeback must be complete */
				503
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	504	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	505
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	506	if (rc != MIGRATEPAGE_SUCCESS)
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	507	return rc;
				508
				509	migrate_page_copy(newpage, page);
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	510	return MIGRATEPAGE_SUCCESS;
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	511	}
				512	EXPORT_SYMBOL(migrate_page);
				513
David Howells	9361401	2006-09-30 20:45:40 +0200	[diff] [blame]	514	#ifdef CONFIG_BLOCK
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	515	/*
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	516	* Migration function for pages with buffers. This function can only be used
				517	* if the underlying filesystem guarantees that no other references to "page"
				518	* exist.
				519	*/
Christoph Lameter	2d1db3b	2006-06-23 02:03:33 -0700	[diff] [blame]	520	int buffer_migrate_page(struct address_space *mapping,
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	521	struct page newpage, struct page page, enum migrate_mode mode)
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	522	{
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	523	struct buffer_head bh, head;
				524	int rc;
				525
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	526	if (!page_has_buffers(page))
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	527	return migrate_page(mapping, newpage, page, mode);
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	528
				529	head = page_buffers(page);
				530
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	531	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	532
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	533	if (rc != MIGRATEPAGE_SUCCESS)
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	534	return rc;
				535
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	536	/*
				537	* In the async case, migrate_page_move_mapping locked the buffers
				538	* with an IRQ-safe spinlock held. In the sync case, the buffers
				539	* need to be locked now
				540	*/
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	541	if (mode != MIGRATE_ASYNC)
				542	BUG_ON(!buffer_migrate_lock_buffers(head, mode));
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	543
				544	ClearPagePrivate(page);
				545	set_page_private(newpage, page_private(page));
				546	set_page_private(page, 0);
				547	put_page(page);
				548	get_page(newpage);
				549
				550	bh = head;
				551	do {
				552	set_bh_page(bh, newpage, bh_offset(bh));
				553	bh = bh->b_this_page;
				554
				555	} while (bh != head);
				556
				557	SetPagePrivate(newpage);
				558
				559	migrate_page_copy(newpage, page);
				560
				561	bh = head;
				562	do {
				563	unlock_buffer(bh);
				564	put_bh(bh);
				565	bh = bh->b_this_page;
				566
				567	} while (bh != head);
				568
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	569	return MIGRATEPAGE_SUCCESS;
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	570	}
				571	EXPORT_SYMBOL(buffer_migrate_page);
David Howells	9361401	2006-09-30 20:45:40 +0200	[diff] [blame]	572	#endif
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	573
Christoph Lameter	04e62a2	2006-06-23 02:03:38 -0700	[diff] [blame]	574	/*
				575	* Writeback a page to clean the dirty state
				576	*/
				577	static int writeout(struct address_space mapping, struct page page)
				578	{
				579	struct writeback_control wbc = {
				580	.sync_mode = WB_SYNC_NONE,
				581	.nr_to_write = 1,
				582	.range_start = 0,
				583	.range_end = LLONG_MAX,
Christoph Lameter	04e62a2	2006-06-23 02:03:38 -0700	[diff] [blame]	584	.for_reclaim = 1
				585	};
				586	int rc;
				587
				588	if (!mapping->a_ops->writepage)
				589	/* No write method for the address space */
				590	return -EINVAL;
				591
				592	if (!clear_page_dirty_for_io(page))
				593	/* Someone else already triggered a write */
				594	return -EAGAIN;
				595
				596	/*
				597	* A dirty page may imply that the underlying filesystem has
				598	* the page on some queue. So the page must be clean for
				599	* migration. Writeout may mean we loose the lock and the
				600	* page state is no longer what we checked for earlier.
				601	* At this point we know that the migration attempt cannot
				602	* be successful.
				603	*/
				604	remove_migration_ptes(page, page);
				605
				606	rc = mapping->a_ops->writepage(page, &wbc);
Christoph Lameter	04e62a2	2006-06-23 02:03:38 -0700	[diff] [blame]	607
				608	if (rc != AOP_WRITEPAGE_ACTIVATE)
				609	/* unlocked. Relock */
				610	lock_page(page);
				611
Hugh Dickins	bda8550	2008-11-19 15:36:36 -0800	[diff] [blame]	612	return (rc < 0) ? -EIO : -EAGAIN;
Christoph Lameter	04e62a2	2006-06-23 02:03:38 -0700	[diff] [blame]	613	}
				614
				615	/*
				616	* Default handling if a filesystem does not provide a migration function.
				617	*/
Christoph Lameter	8351a6e	2006-06-23 02:03:33 -0700	[diff] [blame]	618	static int fallback_migrate_page(struct address_space *mapping,
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	619	struct page newpage, struct page page, enum migrate_mode mode)
Christoph Lameter	8351a6e	2006-06-23 02:03:33 -0700	[diff] [blame]	620	{
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	621	if (PageDirty(page)) {
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	622	/* Only writeback pages in full synchronous migration */
				623	if (mode != MIGRATE_SYNC)
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	624	return -EBUSY;
Christoph Lameter	04e62a2	2006-06-23 02:03:38 -0700	[diff] [blame]	625	return writeout(mapping, page);
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	626	}
Christoph Lameter	8351a6e	2006-06-23 02:03:33 -0700	[diff] [blame]	627
				628	/*
				629	* Buffers may be managed in a filesystem specific way.
				630	* We must have no buffers or drop them.
				631	*/
David Howells	266cf65	2009-04-03 16:42:36 +0100	[diff] [blame]	632	if (page_has_private(page) &&
Christoph Lameter	8351a6e	2006-06-23 02:03:33 -0700	[diff] [blame]	633	!try_to_release_page(page, GFP_KERNEL))
				634	return -EAGAIN;
				635
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	636	return migrate_page(mapping, newpage, page, mode);
Christoph Lameter	8351a6e	2006-06-23 02:03:33 -0700	[diff] [blame]	637	}
				638
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	639	/*
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	640	* Move a page to a newly allocated page
				641	* The page is locked and all ptes have been successfully removed.
				642	*
				643	* The new page will have replaced the old page if this function
				644	* is successful.
Lee Schermerhorn	894bc31	2008-10-18 20:26:39 -0700	[diff] [blame]	645	*
				646	* Return value:
				647	* < 0 - error code
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	648	* MIGRATEPAGE_SUCCESS - success
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	649	*/
Mel Gorman	3fe2011	2010-05-24 14:32:20 -0700	[diff] [blame]	650	static int move_to_new_page(struct page newpage, struct page page,
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	651	int remap_swapcache, enum migrate_mode mode)
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	652	{
				653	struct address_space *mapping;
				654	int rc;
				655
				656	/*
				657	* Block others from accessing the page when we get around to
				658	* establishing additional references. We are the only one
				659	* holding a reference to the new page at this point.
				660	*/
Nick Piggin	529ae9a	2008-08-02 12:01:03 +0200	[diff] [blame]	661	if (!trylock_page(newpage))
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	662	BUG();
				663
				664	/* Prepare mapping for the new page.*/
				665	newpage->index = page->index;
				666	newpage->mapping = page->mapping;
Rik van Riel	b2e1853	2008-10-18 20:26:30 -0700	[diff] [blame]	667	if (PageSwapBacked(page))
				668	SetPageSwapBacked(newpage);
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	669
				670	mapping = page_mapping(page);
				671	if (!mapping)
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	672	rc = migrate_page(mapping, newpage, page, mode);
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	673	else if (mapping->a_ops->migratepage)
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	674	/*
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	675	* Most pages have a mapping and most filesystems provide a
				676	* migratepage callback. Anonymous pages are part of swap
				677	* space which also has its own migratepage callback. This
				678	* is the most common path for page migration.
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	679	*/
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	680	rc = mapping->a_ops->migratepage(mapping,
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	681	newpage, page, mode);
Mel Gorman	b969c4a	2012-01-12 17:19:34 -0800	[diff] [blame]	682	else
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	683	rc = fallback_migrate_page(mapping, newpage, page, mode);
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	684
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	685	if (rc != MIGRATEPAGE_SUCCESS) {
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	686	newpage->mapping = NULL;
Mel Gorman	3fe2011	2010-05-24 14:32:20 -0700	[diff] [blame]	687	} else {
				688	if (remap_swapcache)
				689	remove_migration_ptes(page, newpage);
Konstantin Khlebnikov	35512ec	2012-02-03 15:37:13 -0800	[diff] [blame]	690	page->mapping = NULL;
Mel Gorman	3fe2011	2010-05-24 14:32:20 -0700	[diff] [blame]	691	}
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	692
				693	unlock_page(newpage);
				694
				695	return rc;
				696	}
				697
Minchan Kim	0dabec9	2011-10-31 17:06:57 -0700	[diff] [blame]	698	static int __unmap_and_move(struct page page, struct page newpage,
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	699	int force, bool offlining, enum migrate_mode mode)
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	700	{
Minchan Kim	0dabec9	2011-10-31 17:06:57 -0700	[diff] [blame]	701	int rc = -EAGAIN;
Mel Gorman	3fe2011	2010-05-24 14:32:20 -0700	[diff] [blame]	702	int remap_swapcache = 1;
KAMEZAWA Hiroyuki	56039ef	2011-03-23 16:42:19 -0700	[diff] [blame]	703	struct mem_cgroup *mem;
Mel Gorman	3f6c827	2010-05-24 14:32:17 -0700	[diff] [blame]	704	struct anon_vma *anon_vma = NULL;
Christoph Lameter	95a402c	2006-06-23 02:03:53 -0700	[diff] [blame]	705
Nick Piggin	529ae9a	2008-08-02 12:01:03 +0200	[diff] [blame]	706	if (!trylock_page(page)) {
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	707	if (!force \|\| mode == MIGRATE_ASYNC)
Minchan Kim	0dabec9	2011-10-31 17:06:57 -0700	[diff] [blame]	708	goto out;
Mel Gorman	3e7d344	2011-01-13 15:45:56 -0800	[diff] [blame]	709
				710	/*
				711	* It's not safe for direct compaction to call lock_page.
				712	* For example, during page readahead pages are added locked
				713	* to the LRU. Later, when the IO completes the pages are
				714	* marked uptodate and unlocked. However, the queueing
				715	* could be merging multiple pages for one bio (e.g.
				716	* mpage_readpages). If an allocation happens for the
				717	* second or third page, the process can end up locking
				718	* the same page twice and deadlocking. Rather than
				719	* trying to be clever about what pages can be locked,
				720	* avoid the use of lock_page for direct compaction
				721	* altogether.
				722	*/
				723	if (current->flags & PF_MEMALLOC)
Minchan Kim	0dabec9	2011-10-31 17:06:57 -0700	[diff] [blame]	724	goto out;
Mel Gorman	3e7d344	2011-01-13 15:45:56 -0800	[diff] [blame]	725
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	726	lock_page(page);
				727	}
				728
Hugh Dickins	62b61f6	2009-12-14 17:59:33 -0800	[diff] [blame]	729	/*
				730	* Only memory hotplug's offline_pages() caller has locked out KSM,
				731	* and can safely migrate a KSM page. The other cases have skipped
				732	* PageKsm along with PageReserved - but it is only now when we have
				733	* the page lock that we can be certain it will not go KSM beneath us
				734	* (KSM will not upgrade a page from PageAnon to PageKsm when it sees
				735	* its pagecount raised, but only here do we take the page lock which
				736	* serializes that).
				737	*/
				738	if (PageKsm(page) && !offlining) {
				739	rc = -EBUSY;
				740	goto unlock;
				741	}
				742
KAMEZAWA Hiroyuki	01b1ae6	2009-01-07 18:07:50 -0800	[diff] [blame]	743	/* charge against new page */
Johannes Weiner	0030f53	2012-07-31 16:45:25 -0700	[diff] [blame]	744	mem_cgroup_prepare_migration(page, newpage, &mem);
KAMEZAWA Hiroyuki	01b1ae6	2009-01-07 18:07:50 -0800	[diff] [blame]	745
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	746	if (PageWriteback(page)) {
Andrea Arcangeli	11bc82d	2011-03-22 16:33:11 -0700	[diff] [blame]	747	/*
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	748	* Only in the case of a full syncronous migration is it
				749	* necessary to wait for PageWriteback. In the async case,
				750	* the retry loop is too short and in the sync-light case,
				751	* the overhead of stalling is too much
Andrea Arcangeli	11bc82d	2011-03-22 16:33:11 -0700	[diff] [blame]	752	*/
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	753	if (mode != MIGRATE_SYNC) {
Andrea Arcangeli	11bc82d	2011-03-22 16:33:11 -0700	[diff] [blame]	754	rc = -EBUSY;
				755	goto uncharge;
				756	}
				757	if (!force)
KAMEZAWA Hiroyuki	01b1ae6	2009-01-07 18:07:50 -0800	[diff] [blame]	758	goto uncharge;
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	759	wait_on_page_writeback(page);
				760	}
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	761	/*
KAMEZAWA Hiroyuki	dc386d4	2007-07-26 10:41:07 -0700	[diff] [blame]	762	* By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
				763	* we cannot notice that anon_vma is freed while we migrates a page.
Hugh Dickins	1ce82b6	2011-01-13 15:47:30 -0800	[diff] [blame]	764	* This get_anon_vma() delays freeing anon_vma pointer until the end
KAMEZAWA Hiroyuki	dc386d4	2007-07-26 10:41:07 -0700	[diff] [blame]	765	* of migration. File cache pages are no problem because of page_lock()
KAMEZAWA Hiroyuki	989f89c	2007-08-30 23:56:21 -0700	[diff] [blame]	766	* File Caches may use write_page() or lock_page() in migration, then,
				767	* just care Anon page here.
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	768	*/
KAMEZAWA Hiroyuki	989f89c	2007-08-30 23:56:21 -0700	[diff] [blame]	769	if (PageAnon(page)) {
Hugh Dickins	1ce82b6	2011-01-13 15:47:30 -0800	[diff] [blame]	770	/*
Ingo Molnar	4fc3f1d	2012-12-02 19:56:50 +0000	[diff] [blame]	771	* Only page_lock_anon_vma_read() understands the subtleties of
Hugh Dickins	1ce82b6	2011-01-13 15:47:30 -0800	[diff] [blame]	772	* getting a hold on an anon_vma from outside one of its mms.
				773	*/
Peter Zijlstra	746b18d	2011-05-24 17:12:10 -0700	[diff] [blame]	774	anon_vma = page_get_anon_vma(page);
Hugh Dickins	1ce82b6	2011-01-13 15:47:30 -0800	[diff] [blame]	775	if (anon_vma) {
				776	/*
Peter Zijlstra	746b18d	2011-05-24 17:12:10 -0700	[diff] [blame]	777	* Anon page
Hugh Dickins	1ce82b6	2011-01-13 15:47:30 -0800	[diff] [blame]	778	*/
Hugh Dickins	1ce82b6	2011-01-13 15:47:30 -0800	[diff] [blame]	779	} else if (PageSwapCache(page)) {
Mel Gorman	3fe2011	2010-05-24 14:32:20 -0700	[diff] [blame]	780	/*
				781	* We cannot be sure that the anon_vma of an unmapped
				782	* swapcache page is safe to use because we don't
				783	* know in advance if the VMA that this page belonged
				784	* to still exists. If the VMA and others sharing the
				785	* data have been freed, then the anon_vma could
				786	* already be invalid.
				787	*
				788	* To avoid this possibility, swapcache pages get
				789	* migrated but are not remapped when migration
				790	* completes
				791	*/
				792	remap_swapcache = 0;
				793	} else {
Hugh Dickins	1ce82b6	2011-01-13 15:47:30 -0800	[diff] [blame]	794	goto uncharge;
Mel Gorman	3fe2011	2010-05-24 14:32:20 -0700	[diff] [blame]	795	}
KAMEZAWA Hiroyuki	989f89c	2007-08-30 23:56:21 -0700	[diff] [blame]	796	}
Shaohua Li	62e1c55	2008-02-04 22:29:33 -0800	[diff] [blame]	797
Rafael Aquini	bf6bddf	2012-12-11 16:02:42 -0800	[diff] [blame]	798	if (unlikely(balloon_page_movable(page))) {
				799	/*
				800	* A ballooned page does not need any special attention from
				801	* physical to virtual reverse mapping procedures.
				802	* Skip any attempt to unmap PTEs or to remap swap cache,
				803	* in order to avoid burning cycles at rmap level, and perform
				804	* the page migration right away (proteced by page lock).
				805	*/
				806	rc = balloon_page_migrate(newpage, page, mode);
				807	goto uncharge;
				808	}
				809
KAMEZAWA Hiroyuki	dc386d4	2007-07-26 10:41:07 -0700	[diff] [blame]	810	/*
Shaohua Li	62e1c55	2008-02-04 22:29:33 -0800	[diff] [blame]	811	* Corner case handling:
				812	* 1. When a new swap-cache page is read into, it is added to the LRU
				813	* and treated as swapcache but it has no rmap yet.
				814	* Calling try_to_unmap() against a page->mapping==NULL page will
				815	* trigger a BUG. So handle it here.
				816	* 2. An orphaned page (see truncate_complete_page) might have
				817	* fs-private metadata. The page can be picked up due to memory
				818	* offlining. Everywhere else except page reclaim, the page is
				819	* invisible to the vm, so the page can not be migrated. So try to
				820	* free the metadata, so the page can be freed.
KAMEZAWA Hiroyuki	dc386d4	2007-07-26 10:41:07 -0700	[diff] [blame]	821	*/
Shaohua Li	62e1c55	2008-02-04 22:29:33 -0800	[diff] [blame]	822	if (!page->mapping) {
Hugh Dickins	1ce82b6	2011-01-13 15:47:30 -0800	[diff] [blame]	823	VM_BUG_ON(PageAnon(page));
				824	if (page_has_private(page)) {
Shaohua Li	62e1c55	2008-02-04 22:29:33 -0800	[diff] [blame]	825	try_to_free_buffers(page);
Hugh Dickins	1ce82b6	2011-01-13 15:47:30 -0800	[diff] [blame]	826	goto uncharge;
Shaohua Li	62e1c55	2008-02-04 22:29:33 -0800	[diff] [blame]	827	}
Shaohua Li	abfc348	2009-09-21 17:01:19 -0700	[diff] [blame]	828	goto skip_unmap;
Shaohua Li	62e1c55	2008-02-04 22:29:33 -0800	[diff] [blame]	829	}
				830
KAMEZAWA Hiroyuki	dc386d4	2007-07-26 10:41:07 -0700	[diff] [blame]	831	/* Establish migration ptes or remove ptes */
Andi Kleen	14fa31b	2009-09-16 11:50:10 +0200	[diff] [blame]	832	try_to_unmap(page, TTU_MIGRATION\|TTU_IGNORE_MLOCK\|TTU_IGNORE_ACCESS);
KAMEZAWA Hiroyuki	dc386d4	2007-07-26 10:41:07 -0700	[diff] [blame]	833
Shaohua Li	abfc348	2009-09-21 17:01:19 -0700	[diff] [blame]	834	skip_unmap:
Christoph Lameter	e6a1530	2006-06-25 05:46:49 -0700	[diff] [blame]	835	if (!page_mapped(page))
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	836	rc = move_to_new_page(newpage, page, remap_swapcache, mode);
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	837
Mel Gorman	3fe2011	2010-05-24 14:32:20 -0700	[diff] [blame]	838	if (rc && remap_swapcache)
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	839	remove_migration_ptes(page, page);
Mel Gorman	3f6c827	2010-05-24 14:32:17 -0700	[diff] [blame]	840
				841	/* Drop an anon_vma reference if we took one */
Rik van Riel	7654506	2010-08-09 17:18:41 -0700	[diff] [blame]	842	if (anon_vma)
Peter Zijlstra	9e60109	2011-03-22 16:32:46 -0700	[diff] [blame]	843	put_anon_vma(anon_vma);
Mel Gorman	3f6c827	2010-05-24 14:32:17 -0700	[diff] [blame]	844
KAMEZAWA Hiroyuki	01b1ae6	2009-01-07 18:07:50 -0800	[diff] [blame]	845	uncharge:
Rafael Aquini	bf6bddf	2012-12-11 16:02:42 -0800	[diff] [blame]	846	mem_cgroup_end_migration(mem, page, newpage,
				847	(rc == MIGRATEPAGE_SUCCESS \|\|
				848	rc == MIGRATEPAGE_BALLOON_SUCCESS));
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	849	unlock:
				850	unlock_page(page);
Minchan Kim	0dabec9	2011-10-31 17:06:57 -0700	[diff] [blame]	851	out:
				852	return rc;
				853	}
Christoph Lameter	95a402c	2006-06-23 02:03:53 -0700	[diff] [blame]	854
Minchan Kim	0dabec9	2011-10-31 17:06:57 -0700	[diff] [blame]	855	/*
				856	* Obtain the lock on page, remove all ptes and migrate the page
				857	* to the newly allocated page in newpage.
				858	*/
				859	static int unmap_and_move(new_page_t get_new_page, unsigned long private,
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	860	struct page *page, int force, bool offlining,
				861	enum migrate_mode mode)
Minchan Kim	0dabec9	2011-10-31 17:06:57 -0700	[diff] [blame]	862	{
				863	int rc = 0;
				864	int *result = NULL;
				865	struct page *newpage = get_new_page(page, private, &result);
				866
				867	if (!newpage)
				868	return -ENOMEM;
				869
				870	if (page_count(page) == 1) {
				871	/* page was freed from under us. So we are done. */
				872	goto out;
				873	}
				874
				875	if (unlikely(PageTransHuge(page)))
				876	if (unlikely(split_huge_page(page)))
				877	goto out;
				878
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	879	rc = __unmap_and_move(page, newpage, force, offlining, mode);
Rafael Aquini	bf6bddf	2012-12-11 16:02:42 -0800	[diff] [blame]	880
				881	if (unlikely(rc == MIGRATEPAGE_BALLOON_SUCCESS)) {
				882	/*
				883	* A ballooned page has been migrated already.
				884	* Now, it's the time to wrap-up counters,
				885	* handle the page back to Buddy and return.
				886	*/
				887	dec_zone_page_state(page, NR_ISOLATED_ANON +
				888	page_is_file_cache(page));
				889	balloon_page_free(page);
				890	return MIGRATEPAGE_SUCCESS;
				891	}
Minchan Kim	0dabec9	2011-10-31 17:06:57 -0700	[diff] [blame]	892	out:
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	893	if (rc != -EAGAIN) {
Minchan Kim	0dabec9	2011-10-31 17:06:57 -0700	[diff] [blame]	894	/*
				895	* A page that has been migrated has all references
				896	* removed and will be freed. A page that has not been
				897	* migrated will have kepts its references and be
				898	* restored.
				899	*/
				900	list_del(&page->lru);
KOSAKI Motohiro	a731286	2009-09-21 17:01:37 -0700	[diff] [blame]	901	dec_zone_page_state(page, NR_ISOLATED_ANON +
Johannes Weiner	6c0b135	2009-09-21 17:02:59 -0700	[diff] [blame]	902	page_is_file_cache(page));
Lee Schermerhorn	894bc31	2008-10-18 20:26:39 -0700	[diff] [blame]	903	putback_lru_page(page);
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	904	}
Christoph Lameter	95a402c	2006-06-23 02:03:53 -0700	[diff] [blame]	905	/*
				906	* Move the new page to the LRU. If migration was not successful
				907	* then this will free the page.
				908	*/
Lee Schermerhorn	894bc31	2008-10-18 20:26:39 -0700	[diff] [blame]	909	putback_lru_page(newpage);
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	910	if (result) {
				911	if (rc)
				912	*result = rc;
				913	else
				914	*result = page_to_nid(newpage);
				915	}
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	916	return rc;
				917	}
				918
				919	/*
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	920	* Counterpart of unmap_and_move_page() for hugepage migration.
				921	*
				922	* This function doesn't wait the completion of hugepage I/O
				923	* because there is no race between I/O and migration for hugepage.
				924	* Note that currently hugepage I/O occurs only in direct I/O
				925	* where no lock is held and PG_writeback is irrelevant,
				926	* and writeback status of all subpages are counted in the reference
				927	* count of the head page (i.e. if all subpages of a 2MB hugepage are
				928	* under direct I/O, the reference of the head page is 512 and a bit more.)
				929	* This means that when we try to migrate hugepage whose subpages are
				930	* doing direct I/O, some references remain after try_to_unmap() and
				931	* hugepage migration fails without data corruption.
				932	*
				933	* There is also no race when direct I/O is issued on the page under migration,
				934	* because then pte is replaced with migration swap entry and direct I/O code
				935	* will wait in the page fault for migration to complete.
				936	*/
				937	static int unmap_and_move_huge_page(new_page_t get_new_page,
				938	unsigned long private, struct page *hpage,
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	939	int force, bool offlining,
				940	enum migrate_mode mode)
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	941	{
				942	int rc = 0;
				943	int *result = NULL;
				944	struct page *new_hpage = get_new_page(hpage, private, &result);
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	945	struct anon_vma *anon_vma = NULL;
				946
				947	if (!new_hpage)
				948	return -ENOMEM;
				949
				950	rc = -EAGAIN;
				951
				952	if (!trylock_page(hpage)) {
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	953	if (!force \|\| mode != MIGRATE_SYNC)
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	954	goto out;
				955	lock_page(hpage);
				956	}
				957
Peter Zijlstra	746b18d	2011-05-24 17:12:10 -0700	[diff] [blame]	958	if (PageAnon(hpage))
				959	anon_vma = page_get_anon_vma(hpage);
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	960
				961	try_to_unmap(hpage, TTU_MIGRATION\|TTU_IGNORE_MLOCK\|TTU_IGNORE_ACCESS);
				962
				963	if (!page_mapped(hpage))
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	964	rc = move_to_new_page(new_hpage, hpage, 1, mode);
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	965
				966	if (rc)
				967	remove_migration_ptes(hpage, hpage);
				968
Hugh Dickins	fd4a466	2011-01-13 15:47:31 -0800	[diff] [blame]	969	if (anon_vma)
Peter Zijlstra	9e60109	2011-03-22 16:32:46 -0700	[diff] [blame]	970	put_anon_vma(anon_vma);
Aneesh Kumar K.V	8e6ac7f	2012-07-31 16:42:27 -0700	[diff] [blame]	971
				972	if (!rc)
				973	hugetlb_cgroup_migrate(hpage, new_hpage);
				974
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	975	unlock_page(hpage);
Hillf Danton	0976133	2011-12-08 14:34:20 -0800	[diff] [blame]	976	out:
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	977	put_page(new_hpage);
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	978	if (result) {
				979	if (rc)
				980	*result = rc;
				981	else
				982	*result = page_to_nid(new_hpage);
				983	}
				984	return rc;
				985	}
				986
				987	/*
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	988	* migrate_pages
				989	*
Christoph Lameter	95a402c	2006-06-23 02:03:53 -0700	[diff] [blame]	990	* The function takes one list of pages to migrate and a function
				991	* that determines from the page to be migrated and the private data
				992	* the target of the move and allocates the page.
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	993	*
				994	* The function returns after 10 attempts or if no pages
				995	* are movable anymore because to has become empty
Minchan Kim	cf608ac	2010-10-26 14:21:29 -0700	[diff] [blame]	996	* or no retryable pages exist anymore.
				997	* Caller should call putback_lru_pages to return pages to the LRU
Minchan Kim	28bd657	2011-01-25 15:07:26 -0800	[diff] [blame]	998	* or free list only if ret != 0.
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	999	*
Christoph Lameter	95a402c	2006-06-23 02:03:53 -0700	[diff] [blame]	1000	* Return: Number of pages not migrated or error code.
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1001	*/
Christoph Lameter	95a402c	2006-06-23 02:03:53 -0700	[diff] [blame]	1002	int migrate_pages(struct list_head *from,
Mel Gorman	7f0f249	2011-01-13 15:45:58 -0800	[diff] [blame]	1003	new_page_t get_new_page, unsigned long private, bool offlining,
Mel Gorman	7b2a2d4	2012-10-19 14:07:31 +0100	[diff] [blame]	1004	enum migrate_mode mode, int reason)
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1005	{
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	1006	int retry = 1;
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1007	int nr_failed = 0;
Mel Gorman	5647bc2	2012-10-19 10:46:20 +0100	[diff] [blame]	1008	int nr_succeeded = 0;
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1009	int pass = 0;
				1010	struct page *page;
				1011	struct page *page2;
				1012	int swapwrite = current->flags & PF_SWAPWRITE;
				1013	int rc;
				1014
				1015	if (!swapwrite)
				1016	current->flags \|= PF_SWAPWRITE;
				1017
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	1018	for(pass = 0; pass < 10 && retry; pass++) {
				1019	retry = 0;
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1020
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	1021	list_for_each_entry_safe(page, page2, from, lru) {
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	1022	cond_resched();
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1023
Christoph Lameter	95a402c	2006-06-23 02:03:53 -0700	[diff] [blame]	1024	rc = unmap_and_move(get_new_page, private,
Mel Gorman	77f1fe6	2011-01-13 15:45:57 -0800	[diff] [blame]	1025	page, pass > 2, offlining,
Mel Gorman	a6bc32b	2012-01-12 17:19:43 -0800	[diff] [blame]	1026	mode);
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1027
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	1028	switch(rc) {
Christoph Lameter	95a402c	2006-06-23 02:03:53 -0700	[diff] [blame]	1029	case -ENOMEM:
				1030	goto out;
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	1031	case -EAGAIN:
Christoph Lameter	2d1db3b	2006-06-23 02:03:33 -0700	[diff] [blame]	1032	retry++;
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	1033	break;
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	1034	case MIGRATEPAGE_SUCCESS:
Mel Gorman	5647bc2	2012-10-19 10:46:20 +0100	[diff] [blame]	1035	nr_succeeded++;
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	1036	break;
				1037	default:
Christoph Lameter	2d1db3b	2006-06-23 02:03:33 -0700	[diff] [blame]	1038	/* Permanent failure */
Christoph Lameter	2d1db3b	2006-06-23 02:03:33 -0700	[diff] [blame]	1039	nr_failed++;
Christoph Lameter	e24f0b8	2006-06-23 02:03:51 -0700	[diff] [blame]	1040	break;
Christoph Lameter	2d1db3b	2006-06-23 02:03:33 -0700	[diff] [blame]	1041	}
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1042	}
				1043	}
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	1044	rc = nr_failed + retry;
Christoph Lameter	95a402c	2006-06-23 02:03:53 -0700	[diff] [blame]	1045	out:
Mel Gorman	5647bc2	2012-10-19 10:46:20 +0100	[diff] [blame]	1046	if (nr_succeeded)
				1047	count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
				1048	if (nr_failed)
				1049	count_vm_events(PGMIGRATE_FAIL, nr_failed);
Mel Gorman	7b2a2d4	2012-10-19 14:07:31 +0100	[diff] [blame]	1050	trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
				1051
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1052	if (!swapwrite)
				1053	current->flags &= ~PF_SWAPWRITE;
				1054
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	1055	return rc;
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1056	}
				1057
Aneesh Kumar K.V	189ebff	2012-07-31 16:42:06 -0700	[diff] [blame]	1058	int migrate_huge_page(struct page *hpage, new_page_t get_new_page,
				1059	unsigned long private, bool offlining,
				1060	enum migrate_mode mode)
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	1061	{
Aneesh Kumar K.V	189ebff	2012-07-31 16:42:06 -0700	[diff] [blame]	1062	int pass, rc;
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	1063
Aneesh Kumar K.V	189ebff	2012-07-31 16:42:06 -0700	[diff] [blame]	1064	for (pass = 0; pass < 10; pass++) {
				1065	rc = unmap_and_move_huge_page(get_new_page,
				1066	private, hpage, pass > 2, offlining,
				1067	mode);
				1068	switch (rc) {
				1069	case -ENOMEM:
				1070	goto out;
				1071	case -EAGAIN:
				1072	/* try again */
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	1073	cond_resched();
Aneesh Kumar K.V	189ebff	2012-07-31 16:42:06 -0700	[diff] [blame]	1074	break;
Rafael Aquini	78bd520	2012-12-11 16:02:31 -0800	[diff] [blame]	1075	case MIGRATEPAGE_SUCCESS:
Aneesh Kumar K.V	189ebff	2012-07-31 16:42:06 -0700	[diff] [blame]	1076	goto out;
				1077	default:
				1078	rc = -EIO;
				1079	goto out;
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	1080	}
				1081	}
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	1082	out:
Aneesh Kumar K.V	189ebff	2012-07-31 16:42:06 -0700	[diff] [blame]	1083	return rc;
Naoya Horiguchi	290408d	2010-09-08 10:19:35 +0900	[diff] [blame]	1084	}
				1085
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1086	#ifdef CONFIG_NUMA
				1087	/*
				1088	* Move a list of individual pages
				1089	*/
				1090	struct page_to_node {
				1091	unsigned long addr;
				1092	struct page *page;
				1093	int node;
				1094	int status;
				1095	};
				1096
				1097	static struct page new_page_node(struct page p, unsigned long private,
				1098	int **result)
				1099	{
				1100	struct page_to_node pm = (struct page_to_node )private;
				1101
				1102	while (pm->node != MAX_NUMNODES && pm->page != p)
				1103	pm++;
				1104
				1105	if (pm->node == MAX_NUMNODES)
				1106	return NULL;
				1107
				1108	*result = &pm->status;
				1109
Mel Gorman	6484eb3	2009-06-16 15:31:54 -0700	[diff] [blame]	1110	return alloc_pages_exact_node(pm->node,
Mel Gorman	769848c	2007-07-17 04:03:05 -0700	[diff] [blame]	1111	GFP_HIGHUSER_MOVABLE \| GFP_THISNODE, 0);
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1112	}
				1113
				1114	/*
				1115	* Move a set of pages as indicated in the pm array. The addr
				1116	* field must be set to the virtual address of the page to be moved
				1117	* and the node number must contain a valid target node.
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1118	* The pm array ends with node = MAX_NUMNODES.
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1119	*/
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1120	static int do_move_page_to_node_array(struct mm_struct *mm,
				1121	struct page_to_node *pm,
				1122	int migrate_all)
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1123	{
				1124	int err;
				1125	struct page_to_node *pp;
				1126	LIST_HEAD(pagelist);
				1127
				1128	down_read(&mm->mmap_sem);
				1129
				1130	/*
				1131	* Build a list of pages to migrate
				1132	*/
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1133	for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
				1134	struct vm_area_struct *vma;
				1135	struct page *page;
				1136
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1137	err = -EFAULT;
				1138	vma = find_vma(mm, pp->addr);
Gleb Natapov	70384dc	2010-10-26 14:22:07 -0700	[diff] [blame]	1139	if (!vma \|\| pp->addr < vma->vm_start \|\| !vma_migratable(vma))
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1140	goto set_status;
				1141
Andrea Arcangeli	500d65d	2011-01-13 15:46:55 -0800	[diff] [blame]	1142	page = follow_page(vma, pp->addr, FOLL_GET\|FOLL_SPLIT);
Linus Torvalds	89f5b7d	2008-06-20 11:18:25 -0700	[diff] [blame]	1143
				1144	err = PTR_ERR(page);
				1145	if (IS_ERR(page))
				1146	goto set_status;
				1147
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1148	err = -ENOENT;
				1149	if (!page)
				1150	goto set_status;
				1151
Hugh Dickins	62b61f6	2009-12-14 17:59:33 -0800	[diff] [blame]	1152	/* Use PageReserved to check for zero page */
				1153	if (PageReserved(page) \|\| PageKsm(page))
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1154	goto put_and_set;
				1155
				1156	pp->page = page;
				1157	err = page_to_nid(page);
				1158
				1159	if (err == pp->node)
				1160	/*
				1161	* Node already in the right place
				1162	*/
				1163	goto put_and_set;
				1164
				1165	err = -EACCES;
				1166	if (page_mapcount(page) > 1 &&
				1167	!migrate_all)
				1168	goto put_and_set;
				1169
Nick Piggin	62695a8	2008-10-18 20:26:09 -0700	[diff] [blame]	1170	err = isolate_lru_page(page);
KOSAKI Motohiro	6d9c285	2009-12-14 17:58:11 -0800	[diff] [blame]	1171	if (!err) {
Nick Piggin	62695a8	2008-10-18 20:26:09 -0700	[diff] [blame]	1172	list_add_tail(&page->lru, &pagelist);
KOSAKI Motohiro	6d9c285	2009-12-14 17:58:11 -0800	[diff] [blame]	1173	inc_zone_page_state(page, NR_ISOLATED_ANON +
				1174	page_is_file_cache(page));
				1175	}
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1176	put_and_set:
				1177	/*
				1178	* Either remove the duplicate refcount from
				1179	* isolate_lru_page() or drop the page ref if it was
				1180	* not isolated.
				1181	*/
				1182	put_page(page);
				1183	set_status:
				1184	pp->status = err;
				1185	}
				1186
Brice Goglin	e78bbfa	2008-10-18 20:27:15 -0700	[diff] [blame]	1187	err = 0;
Minchan Kim	cf608ac	2010-10-26 14:21:29 -0700	[diff] [blame]	1188	if (!list_empty(&pagelist)) {
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1189	err = migrate_pages(&pagelist, new_page_node,
Mel Gorman	7b2a2d4	2012-10-19 14:07:31 +0100	[diff] [blame]	1190	(unsigned long)pm, 0, MIGRATE_SYNC,
				1191	MR_SYSCALL);
Minchan Kim	cf608ac	2010-10-26 14:21:29 -0700	[diff] [blame]	1192	if (err)
				1193	putback_lru_pages(&pagelist);
				1194	}
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1195
				1196	up_read(&mm->mmap_sem);
				1197	return err;
				1198	}
				1199
				1200	/*
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1201	* Migrate an array of page address onto an array of nodes and fill
				1202	* the corresponding array of status.
				1203	*/
Christoph Lameter	3268c63	2012-03-21 16:34:06 -0700	[diff] [blame]	1204	static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1205	unsigned long nr_pages,
				1206	const void __user * __user *pages,
				1207	const int __user *nodes,
				1208	int __user *status, int flags)
				1209	{
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1210	struct page_to_node *pm;
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1211	unsigned long chunk_nr_pages;
				1212	unsigned long chunk_start;
				1213	int err;
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1214
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1215	err = -ENOMEM;
				1216	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
				1217	if (!pm)
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1218	goto out;
Brice Goglin	35282a2	2009-06-16 15:32:43 -0700	[diff] [blame]	1219
				1220	migrate_prep();
				1221
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1222	/*
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1223	* Store a chunk of page_to_node array in a page,
				1224	* but keep the last one as a marker
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1225	*/
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1226	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1227
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1228	for (chunk_start = 0;
				1229	chunk_start < nr_pages;
				1230	chunk_start += chunk_nr_pages) {
				1231	int j;
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1232
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1233	if (chunk_start + chunk_nr_pages > nr_pages)
				1234	chunk_nr_pages = nr_pages - chunk_start;
				1235
				1236	/* fill the chunk pm with addrs and nodes from user-space */
				1237	for (j = 0; j < chunk_nr_pages; j++) {
				1238	const void __user *p;
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1239	int node;
				1240
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1241	err = -EFAULT;
				1242	if (get_user(p, pages + j + chunk_start))
				1243	goto out_pm;
				1244	pm[j].addr = (unsigned long) p;
				1245
				1246	if (get_user(node, nodes + j + chunk_start))
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1247	goto out_pm;
				1248
				1249	err = -ENODEV;
Linus Torvalds	6f5a55f	2010-02-05 16:16:50 -0800	[diff] [blame]	1250	if (node < 0 \|\| node >= MAX_NUMNODES)
				1251	goto out_pm;
				1252
Lai Jiangshan	389162c	2012-12-12 13:51:30 -0800	[diff] [blame]	1253	if (!node_state(node, N_MEMORY))
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1254	goto out_pm;
				1255
				1256	err = -EACCES;
				1257	if (!node_isset(node, task_nodes))
				1258	goto out_pm;
				1259
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1260	pm[j].node = node;
				1261	}
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1262
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1263	/* End marker for this chunk */
				1264	pm[chunk_nr_pages].node = MAX_NUMNODES;
				1265
				1266	/* Migrate this chunk */
				1267	err = do_move_page_to_node_array(mm, pm,
				1268	flags & MPOL_MF_MOVE_ALL);
				1269	if (err < 0)
				1270	goto out_pm;
				1271
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1272	/* Return status information */
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1273	for (j = 0; j < chunk_nr_pages; j++)
				1274	if (put_user(pm[j].status, status + j + chunk_start)) {
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1275	err = -EFAULT;
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1276	goto out_pm;
				1277	}
				1278	}
				1279	err = 0;
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1280
				1281	out_pm:
Brice Goglin	3140a22	2009-01-06 14:38:57 -0800	[diff] [blame]	1282	free_page((unsigned long)pm);
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1283	out:
				1284	return err;
				1285	}
				1286
				1287	/*
Brice Goglin	2f007e7	2008-10-18 20:27:16 -0700	[diff] [blame]	1288	* Determine the nodes of an array of pages and store it in an array of status.
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1289	*/
Brice Goglin	80bba12	2008-12-09 13:14:23 -0800	[diff] [blame]	1290	static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
				1291	const void __user *pages, int status)
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1292	{
Brice Goglin	2f007e7	2008-10-18 20:27:16 -0700	[diff] [blame]	1293	unsigned long i;
Brice Goglin	2f007e7	2008-10-18 20:27:16 -0700	[diff] [blame]	1294
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1295	down_read(&mm->mmap_sem);
				1296
Brice Goglin	2f007e7	2008-10-18 20:27:16 -0700	[diff] [blame]	1297	for (i = 0; i < nr_pages; i++) {
Brice Goglin	80bba12	2008-12-09 13:14:23 -0800	[diff] [blame]	1298	unsigned long addr = (unsigned long)(*pages);
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1299	struct vm_area_struct *vma;
				1300	struct page *page;
KOSAKI Motohiro	c095adb	2008-12-16 16:06:43 +0900	[diff] [blame]	1301	int err = -EFAULT;
Brice Goglin	2f007e7	2008-10-18 20:27:16 -0700	[diff] [blame]	1302
				1303	vma = find_vma(mm, addr);
Gleb Natapov	70384dc	2010-10-26 14:22:07 -0700	[diff] [blame]	1304	if (!vma \|\| addr < vma->vm_start)
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1305	goto set_status;
				1306
Brice Goglin	2f007e7	2008-10-18 20:27:16 -0700	[diff] [blame]	1307	page = follow_page(vma, addr, 0);
Linus Torvalds	89f5b7d	2008-06-20 11:18:25 -0700	[diff] [blame]	1308
				1309	err = PTR_ERR(page);
				1310	if (IS_ERR(page))
				1311	goto set_status;
				1312
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1313	err = -ENOENT;
				1314	/* Use PageReserved to check for zero page */
Hugh Dickins	62b61f6	2009-12-14 17:59:33 -0800	[diff] [blame]	1315	if (!page \|\| PageReserved(page) \|\| PageKsm(page))
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1316	goto set_status;
				1317
				1318	err = page_to_nid(page);
				1319	set_status:
Brice Goglin	80bba12	2008-12-09 13:14:23 -0800	[diff] [blame]	1320	*status = err;
				1321
				1322	pages++;
				1323	status++;
				1324	}
				1325
				1326	up_read(&mm->mmap_sem);
				1327	}
				1328
				1329	/*
				1330	* Determine the nodes of a user array of pages and store it in
				1331	* a user array of status.
				1332	*/
				1333	static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
				1334	const void __user * __user *pages,
				1335	int __user *status)
				1336	{
				1337	#define DO_PAGES_STAT_CHUNK_NR 16
				1338	const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
				1339	int chunk_status[DO_PAGES_STAT_CHUNK_NR];
Brice Goglin	80bba12	2008-12-09 13:14:23 -0800	[diff] [blame]	1340
H. Peter Anvin	87b8d1a	2010-02-18 16:13:40 -0800	[diff] [blame]	1341	while (nr_pages) {
				1342	unsigned long chunk_nr;
Brice Goglin	80bba12	2008-12-09 13:14:23 -0800	[diff] [blame]	1343
H. Peter Anvin	87b8d1a	2010-02-18 16:13:40 -0800	[diff] [blame]	1344	chunk_nr = nr_pages;
				1345	if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
				1346	chunk_nr = DO_PAGES_STAT_CHUNK_NR;
				1347
				1348	if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
				1349	break;
Brice Goglin	80bba12	2008-12-09 13:14:23 -0800	[diff] [blame]	1350
				1351	do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
				1352
H. Peter Anvin	87b8d1a	2010-02-18 16:13:40 -0800	[diff] [blame]	1353	if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
				1354	break;
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1355
H. Peter Anvin	87b8d1a	2010-02-18 16:13:40 -0800	[diff] [blame]	1356	pages += chunk_nr;
				1357	status += chunk_nr;
				1358	nr_pages -= chunk_nr;
				1359	}
				1360	return nr_pages ? -EFAULT : 0;
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1361	}
				1362
				1363	/*
				1364	* Move a list of pages in the address space of the currently executing
				1365	* process.
				1366	*/
Heiko Carstens	938bb9f	2009-01-14 14:14:30 +0100	[diff] [blame]	1367	SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
				1368	const void __user * __user *, pages,
				1369	const int __user *, nodes,
				1370	int __user *, status, int, flags)
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1371	{
David Howells	c69e8d9	2008-11-14 10:39:19 +1100	[diff] [blame]	1372	const struct cred cred = current_cred(), tcred;
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1373	struct task_struct *task;
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1374	struct mm_struct *mm;
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1375	int err;
Christoph Lameter	3268c63	2012-03-21 16:34:06 -0700	[diff] [blame]	1376	nodemask_t task_nodes;
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1377
				1378	/* Check flags */
				1379	if (flags & ~(MPOL_MF_MOVE\|MPOL_MF_MOVE_ALL))
				1380	return -EINVAL;
				1381
				1382	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
				1383	return -EPERM;
				1384
				1385	/* Find the mm_struct */
Greg Thelen	a879bf5	2011-02-25 14:44:13 -0800	[diff] [blame]	1386	rcu_read_lock();
Pavel Emelyanov	228ebcb	2007-10-18 23:40:16 -0700	[diff] [blame]	1387	task = pid ? find_task_by_vpid(pid) : current;
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1388	if (!task) {
Greg Thelen	a879bf5	2011-02-25 14:44:13 -0800	[diff] [blame]	1389	rcu_read_unlock();
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1390	return -ESRCH;
				1391	}
Christoph Lameter	3268c63	2012-03-21 16:34:06 -0700	[diff] [blame]	1392	get_task_struct(task);
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1393
				1394	/*
				1395	* Check if this process has the right to modify the specified
				1396	* process. The right exists if the process has administrative
				1397	* capabilities, superuser privileges or the same
				1398	* userid as the target process.
				1399	*/
David Howells	c69e8d9	2008-11-14 10:39:19 +1100	[diff] [blame]	1400	tcred = __task_cred(task);
Eric W. Biederman	b38a86e	2012-03-12 15:48:24 -0700	[diff] [blame]	1401	if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) &&
				1402	!uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) &&
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1403	!capable(CAP_SYS_NICE)) {
David Howells	c69e8d9	2008-11-14 10:39:19 +1100	[diff] [blame]	1404	rcu_read_unlock();
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1405	err = -EPERM;
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1406	goto out;
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1407	}
David Howells	c69e8d9	2008-11-14 10:39:19 +1100	[diff] [blame]	1408	rcu_read_unlock();
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1409
David Quigley	86c3a76	2006-06-23 02:04:02 -0700	[diff] [blame]	1410	err = security_task_movememory(task);
				1411	if (err)
Brice Goglin	5e9a0f0	2008-10-18 20:27:17 -0700	[diff] [blame]	1412	goto out;
David Quigley	86c3a76	2006-06-23 02:04:02 -0700	[diff] [blame]	1413
Christoph Lameter	3268c63	2012-03-21 16:34:06 -0700	[diff] [blame]	1414	task_nodes = cpuset_mems_allowed(task);
				1415	mm = get_task_mm(task);
				1416	put_task_struct(task);
				1417
Sasha Levin	6e8b09e	2012-04-25 16:01:53 -0700	[diff] [blame]	1418	if (!mm)
				1419	return -EINVAL;
				1420
				1421	if (nodes)
				1422	err = do_pages_move(mm, task_nodes, nr_pages, pages,
				1423	nodes, status, flags);
				1424	else
				1425	err = do_pages_stat(mm, nr_pages, pages, status);
Christoph Lameter	3268c63	2012-03-21 16:34:06 -0700	[diff] [blame]	1426
				1427	mmput(mm);
				1428	return err;
David Quigley	86c3a76	2006-06-23 02:04:02 -0700	[diff] [blame]	1429
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1430	out:
Christoph Lameter	3268c63	2012-03-21 16:34:06 -0700	[diff] [blame]	1431	put_task_struct(task);
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1432	return err;
				1433	}
Christoph Lameter	742755a	2006-06-23 02:03:55 -0700	[diff] [blame]	1434
Christoph Lameter	7b2259b	2006-06-25 05:46:48 -0700	[diff] [blame]	1435	/*
				1436	* Call migration functions in the vma_ops that may prepare
				1437	* memory in a vm for migration. migration functions may perform
				1438	* the migration for vmas that do not have an underlying page struct.
				1439	*/
				1440	int migrate_vmas(struct mm_struct mm, const nodemask_t to,
				1441	const nodemask_t *from, unsigned long flags)
				1442	{
				1443	struct vm_area_struct *vma;
				1444	int err = 0;
				1445
Daisuke Nishimura	1001c9f	2009-02-11 13:04:18 -0800	[diff] [blame]	1446	for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
Christoph Lameter	7b2259b	2006-06-25 05:46:48 -0700	[diff] [blame]	1447	if (vma->vm_ops && vma->vm_ops->migrate) {
				1448	err = vma->vm_ops->migrate(vma, to, from, flags);
				1449	if (err)
				1450	break;
				1451	}
				1452	}
				1453	return err;
				1454	}
Peter Zijlstra	7039e1d	2012-10-25 14:16:34 +0200	[diff] [blame]	1455
				1456	#ifdef CONFIG_NUMA_BALANCING
				1457	/*
				1458	* Returns true if this is a safe migration target node for misplaced NUMA
				1459	* pages. Currently it only checks the watermarks which crude
				1460	*/
				1461	static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
				1462	int nr_migrate_pages)
				1463	{
				1464	int z;
				1465	for (z = pgdat->nr_zones - 1; z >= 0; z--) {
				1466	struct zone *zone = pgdat->node_zones + z;
				1467
				1468	if (!populated_zone(zone))
				1469	continue;
				1470
				1471	if (zone->all_unreclaimable)
				1472	continue;
				1473
				1474	/* Avoid waking kswapd by allocating pages_to_migrate pages. */
				1475	if (!zone_watermark_ok(zone, 0,
				1476	high_wmark_pages(zone) +
				1477	nr_migrate_pages,
				1478	0, 0))
				1479	continue;
				1480	return true;
				1481	}
				1482	return false;
				1483	}
				1484
				1485	static struct page alloc_misplaced_dst_page(struct page page,
				1486	unsigned long data,
				1487	int **result)
				1488	{
				1489	int nid = (int) data;
				1490	struct page *newpage;
				1491
				1492	newpage = alloc_pages_exact_node(nid,
				1493	(GFP_HIGHUSER_MOVABLE \| GFP_THISNODE \|
				1494	__GFP_NOMEMALLOC \| __GFP_NORETRY \|
				1495	__GFP_NOWARN) &
				1496	~GFP_IOFS, 0);
Hillf Danton	bac0382	2012-11-27 14:46:24 +0000	[diff] [blame]	1497	if (newpage)
				1498	page_xchg_last_nid(newpage, page_last_nid(page));
				1499
Peter Zijlstra	7039e1d	2012-10-25 14:16:34 +0200	[diff] [blame]	1500	return newpage;
				1501	}
				1502
				1503	/*
Mel Gorman	a8f6077	2012-11-14 21:41:46 +0000	[diff] [blame]	1504	* page migration rate limiting control.
				1505	* Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
				1506	* window of time. Default here says do not migrate more than 1280M per second.
Mel Gorman	e14808b	2012-11-19 10:59:15 +0000	[diff] [blame]	1507	* If a node is rate-limited then PTE NUMA updates are also rate-limited. However
				1508	* as it is faults that reset the window, pte updates will happen unconditionally
				1509	* if there has not been a fault since @pteupdate_interval_millisecs after the
				1510	* throttle window closed.
Mel Gorman	a8f6077	2012-11-14 21:41:46 +0000	[diff] [blame]	1511	*/
				1512	static unsigned int migrate_interval_millisecs __read_mostly = 100;
Mel Gorman	e14808b	2012-11-19 10:59:15 +0000	[diff] [blame]	1513	static unsigned int pteupdate_interval_millisecs __read_mostly = 1000;
Mel Gorman	a8f6077	2012-11-14 21:41:46 +0000	[diff] [blame]	1514	static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
				1515
Mel Gorman	e14808b	2012-11-19 10:59:15 +0000	[diff] [blame]	1516	/* Returns true if NUMA migration is currently rate limited */
				1517	bool migrate_ratelimited(int node)
				1518	{
				1519	pg_data_t *pgdat = NODE_DATA(node);
				1520
				1521	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window +
				1522	msecs_to_jiffies(pteupdate_interval_millisecs)))
				1523	return false;
				1524
				1525	if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages)
				1526	return false;
				1527
				1528	return true;
				1529	}
				1530
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1531	/* Returns true if the node is migrate rate-limited after the update */
Mel Gorman	d28d4335	2012-11-29 09:24:36 +0000	[diff] [blame]	1532	bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages)
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1533	{
				1534	bool rate_limited = false;
				1535
				1536	/*
				1537	* Rate-limit the amount of data that is being migrated to a node.
				1538	* Optimal placement is no good if the memory bus is saturated and
				1539	* all the time is being spent migrating!
				1540	*/
				1541	spin_lock(&pgdat->numabalancing_migrate_lock);
				1542	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
				1543	pgdat->numabalancing_migrate_nr_pages = 0;
				1544	pgdat->numabalancing_migrate_next_window = jiffies +
				1545	msecs_to_jiffies(migrate_interval_millisecs);
				1546	}
				1547	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
				1548	rate_limited = true;
				1549	else
Mel Gorman	d28d4335	2012-11-29 09:24:36 +0000	[diff] [blame]	1550	pgdat->numabalancing_migrate_nr_pages += nr_pages;
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1551	spin_unlock(&pgdat->numabalancing_migrate_lock);
				1552
				1553	return rate_limited;
				1554	}
				1555
				1556	int numamigrate_isolate_page(pg_data_t pgdat, struct page page)
				1557	{
				1558	int ret = 0;
				1559
				1560	/* Avoid migrating to a node that is nearly full */
				1561	if (migrate_balanced_pgdat(pgdat, 1)) {
				1562	int page_lru;
				1563
				1564	if (isolate_lru_page(page)) {
				1565	put_page(page);
				1566	return 0;
				1567	}
				1568
				1569	/* Page is isolated */
				1570	ret = 1;
				1571	page_lru = page_is_file_cache(page);
				1572	if (!PageTransHuge(page))
				1573	inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
				1574	else
				1575	mod_zone_page_state(page_zone(page),
				1576	NR_ISOLATED_ANON + page_lru,
				1577	HPAGE_PMD_NR);
				1578	}
				1579
				1580	/*
				1581	* Page is either isolated or there is not enough space on the target
				1582	* node. If isolated, then it has taken a reference count and the
				1583	* callers reference can be safely dropped without the page
				1584	* disappearing underneath us during migration. Otherwise the page is
				1585	* not to be migrated but the callers reference should still be
				1586	* dropped so it does not leak.
				1587	*/
				1588	put_page(page);
				1589
				1590	return ret;
				1591	}
				1592
Mel Gorman	a8f6077	2012-11-14 21:41:46 +0000	[diff] [blame]	1593	/*
Peter Zijlstra	7039e1d	2012-10-25 14:16:34 +0200	[diff] [blame]	1594	* Attempt to migrate a misplaced page to the specified destination
				1595	* node. Caller is expected to have an elevated reference count on
				1596	* the page that will be dropped by this function before returning.
				1597	*/
				1598	int migrate_misplaced_page(struct page *page, int node)
				1599	{
Mel Gorman	a8f6077	2012-11-14 21:41:46 +0000	[diff] [blame]	1600	pg_data_t *pgdat = NODE_DATA(node);
Peter Zijlstra	7039e1d	2012-10-25 14:16:34 +0200	[diff] [blame]	1601	int isolated = 0;
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1602	int nr_remaining;
Peter Zijlstra	7039e1d	2012-10-25 14:16:34 +0200	[diff] [blame]	1603	LIST_HEAD(migratepages);
				1604
				1605	/*
				1606	* Don't migrate pages that are mapped in multiple processes.
				1607	* TODO: Handle false sharing detection instead of this hammer
				1608	*/
				1609	if (page_mapcount(page) != 1) {
				1610	put_page(page);
				1611	goto out;
				1612	}
				1613
Mel Gorman	a8f6077	2012-11-14 21:41:46 +0000	[diff] [blame]	1614	/*
				1615	* Rate-limit the amount of data that is being migrated to a node.
				1616	* Optimal placement is no good if the memory bus is saturated and
				1617	* all the time is being spent migrating!
				1618	*/
Mel Gorman	d28d4335	2012-11-29 09:24:36 +0000	[diff] [blame]	1619	if (numamigrate_update_ratelimit(pgdat, 1)) {
Mel Gorman	a8f6077	2012-11-14 21:41:46 +0000	[diff] [blame]	1620	put_page(page);
				1621	goto out;
				1622	}
Mel Gorman	a8f6077	2012-11-14 21:41:46 +0000	[diff] [blame]	1623
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1624	isolated = numamigrate_isolate_page(pgdat, page);
				1625	if (!isolated)
				1626	goto out;
Peter Zijlstra	7039e1d	2012-10-25 14:16:34 +0200	[diff] [blame]	1627
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1628	list_add(&page->lru, &migratepages);
				1629	nr_remaining = migrate_pages(&migratepages,
				1630	alloc_misplaced_dst_page,
				1631	node, false, MIGRATE_ASYNC,
				1632	MR_NUMA_MISPLACED);
				1633	if (nr_remaining) {
				1634	putback_lru_pages(&migratepages);
				1635	isolated = 0;
				1636	} else
				1637	count_vm_numa_event(NUMA_PAGE_MIGRATE);
Peter Zijlstra	7039e1d	2012-10-25 14:16:34 +0200	[diff] [blame]	1638	BUG_ON(!list_empty(&migratepages));
				1639	out:
				1640	return isolated;
				1641	}
Mel Gorman	220018d	2012-12-05 09:32:56 +0000	[diff] [blame]	1642	#endif /* CONFIG_NUMA_BALANCING */
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1643
Mel Gorman	220018d	2012-12-05 09:32:56 +0000	[diff] [blame]	1644	#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1645	int migrate_misplaced_transhuge_page(struct mm_struct *mm,
				1646	struct vm_area_struct *vma,
				1647	pmd_t *pmd, pmd_t entry,
				1648	unsigned long address,
				1649	struct page *page, int node)
				1650	{
				1651	unsigned long haddr = address & HPAGE_PMD_MASK;
				1652	pg_data_t *pgdat = NODE_DATA(node);
				1653	int isolated = 0;
				1654	struct page *new_page = NULL;
				1655	struct mem_cgroup *memcg = NULL;
				1656	int page_lru = page_is_file_cache(page);
				1657
				1658	/*
				1659	* Don't migrate pages that are mapped in multiple processes.
				1660	* TODO: Handle false sharing detection instead of this hammer
				1661	*/
				1662	if (page_mapcount(page) != 1)
				1663	goto out_dropref;
				1664
				1665	/*
				1666	* Rate-limit the amount of data that is being migrated to a node.
				1667	* Optimal placement is no good if the memory bus is saturated and
				1668	* all the time is being spent migrating!
				1669	*/
Mel Gorman	d28d4335	2012-11-29 09:24:36 +0000	[diff] [blame]	1670	if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1671	goto out_dropref;
				1672
				1673	new_page = alloc_pages_node(node,
				1674	(GFP_TRANSHUGE \| GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
Mel Gorman	7548341	2012-11-27 10:31:44 +0000	[diff] [blame]	1675	if (!new_page) {
				1676	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1677	goto out_dropref;
Mel Gorman	7548341	2012-11-27 10:31:44 +0000	[diff] [blame]	1678	}
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1679	page_xchg_last_nid(new_page, page_last_nid(page));
				1680
				1681	isolated = numamigrate_isolate_page(pgdat, page);
Mel Gorman	04fa5d6	2013-01-11 14:31:40 -0800	[diff] [blame]	1682
				1683	/*
				1684	* Failing to isolate or a GUP pin prevents migration. The expected
				1685	* page count is 2. 1 for anonymous pages without a mapping and 1
				1686	* for the callers pin. If the page was isolated, the page will
				1687	* need to be put back on the LRU.
				1688	*/
				1689	if (!isolated \|\| page_count(page) != 2) {
Mel Gorman	7548341	2012-11-27 10:31:44 +0000	[diff] [blame]	1690	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1691	put_page(new_page);
Mel Gorman	04fa5d6	2013-01-11 14:31:40 -0800	[diff] [blame]	1692	if (isolated) {
				1693	putback_lru_page(page);
				1694	isolated = 0;
				1695	goto out;
				1696	}
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1697	goto out_keep_locked;
				1698	}
				1699
				1700	/* Prepare a page as a migration target */
				1701	__set_page_locked(new_page);
				1702	SetPageSwapBacked(new_page);
				1703
				1704	/* anon mapping, we can simply copy page->mapping to the new page: */
				1705	new_page->mapping = page->mapping;
				1706	new_page->index = page->index;
				1707	migrate_page_copy(new_page, page);
				1708	WARN_ON(PageLRU(new_page));
				1709
				1710	/* Recheck the target PMD */
				1711	spin_lock(&mm->page_table_lock);
				1712	if (unlikely(!pmd_same(*pmd, entry))) {
				1713	spin_unlock(&mm->page_table_lock);
				1714
				1715	/* Reverse changes made by migrate_page_copy() */
				1716	if (TestClearPageActive(new_page))
				1717	SetPageActive(page);
				1718	if (TestClearPageUnevictable(new_page))
				1719	SetPageUnevictable(page);
				1720	mlock_migrate_page(page, new_page);
				1721
				1722	unlock_page(new_page);
				1723	put_page(new_page); /* Free it */
				1724
				1725	unlock_page(page);
				1726	putback_lru_page(page);
				1727
				1728	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
				1729	goto out;
				1730	}
				1731
				1732	/*
				1733	* Traditional migration needs to prepare the memcg charge
				1734	* transaction early to prevent the old page from being
				1735	* uncharged when installing migration entries. Here we can
				1736	* save the potential rollback and start the charge transfer
				1737	* only when migration is already known to end successfully.
				1738	*/
				1739	mem_cgroup_prepare_migration(page, new_page, &memcg);
				1740
				1741	entry = mk_pmd(new_page, vma->vm_page_prot);
				1742	entry = pmd_mknonnuma(entry);
				1743	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
				1744	entry = pmd_mkhuge(entry);
				1745
				1746	page_add_new_anon_rmap(new_page, vma, haddr);
				1747
				1748	set_pmd_at(mm, haddr, pmd, entry);
Stephen Rothwell	ce4a9cc	2012-12-10 19:50:57 +1100	[diff] [blame]	1749	update_mmu_cache_pmd(vma, address, &entry);
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	1750	page_remove_rmap(page);
				1751	/*
				1752	* Finish the charge transaction under the page table lock to
				1753	* prevent split_huge_page() from dividing up the charge
				1754	* before it's fully transferred to the new page.
				1755	*/
				1756	mem_cgroup_end_migration(memcg, page, new_page, true);
				1757	spin_unlock(&mm->page_table_lock);
				1758
				1759	unlock_page(new_page);
				1760	unlock_page(page);
				1761	put_page(page); /* Drop the rmap reference */
				1762	put_page(page); /* Drop the LRU isolation reference */
				1763
				1764	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
				1765	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
				1766
				1767	out:
				1768	mod_zone_page_state(page_zone(page),
				1769	NR_ISOLATED_ANON + page_lru,
				1770	-HPAGE_PMD_NR);
				1771	return isolated;
				1772
				1773	out_dropref:
				1774	put_page(page);
				1775	out_keep_locked:
				1776	return 0;
				1777	}
Peter Zijlstra	7039e1d	2012-10-25 14:16:34 +0200	[diff] [blame]	1778	#endif /* CONFIG_NUMA_BALANCING */
				1779
				1780	#endif /* CONFIG_NUMA */